[llvm] [AMDGPU] Add scheduling stage to rewrite MFMA from VGPR to AGPR (PR #170335)
Tony Linthicum via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 2 10:59:34 PST 2025
https://github.com/tlinthic updated https://github.com/llvm/llvm-project/pull/170335
>From 1122614a781b7f81057ce5cace37707650ef6695 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 15 Jul 2025 15:10:41 -0700
Subject: [PATCH 1/8] [AMDGPU] Add scheduling stage to rewrite MFMA from VGPR
to AGPR
Change-Id: I47b2a4274a35f3cf0a6d064674d1d29526e4dfd2
---
.../llvm/CodeGen/MachineInstrBuilder.h | 15 +
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 30 +
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 641 ++
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 70 +-
.../AMDGPU/sched_mfma_rewrite_copies.mir | 5591 +++++++++++++++++
.../AMDGPU/sched_mfma_rewrite_cost.mir | 524 ++
6 files changed, 6866 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir
diff --git a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
index caeb430d6fd1c..dfa35461e52c1 100644
--- a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -459,6 +459,21 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD);
}
+/// This version of the builder inserts the newly-built instruction after the
+/// given position in the given MachineBasicBlock, and does NOT take a
+/// destination register.
+inline MachineInstrBuilder BuildMIAfter(MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I,
+ const MIMetadata &MIMD,
+ const MCInstrDesc &MCID) {
+ MachineFunction &MF = *BB.getParent();
+ MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL());
+ BB.insertAfter(I, MI);
+ return MachineInstrBuilder(MF, MI)
+ .setPCSections(MIMD.getPCSections())
+ .setMMRAMetadata(MIMD.getMMRAMetadata());
+}
+
inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
MachineBasicBlock::instr_iterator I,
const MIMetadata &MIMD,
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index f9d3ce039092e..7ca8ea7be09f0 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -102,6 +102,36 @@ struct GCNRegPressure {
DynamicVGPRBlockSize));
}
+ unsigned getVGPRSpills(const GCNSubtarget &ST, MachineFunction &MF) {
+ if (!ST.hasGFX90AInsts())
+ return 0;
+
+ auto MaxVectorRegs = ST.getMaxNumVectorRegs(MF.getFunction());
+ unsigned ArchVGPRThreshold = MaxVectorRegs.first;
+ unsigned AGPRThreshold = MaxVectorRegs.second;
+
+ unsigned ArchPressure = getArchVGPRNum();
+ unsigned AGPRPressure = getAGPRNum();
+
+ unsigned ArchSpill = ArchPressure > ArchVGPRThreshold
+ ? (ArchPressure - ArchVGPRThreshold)
+ : 0;
+ unsigned AGPRSpill =
+ AGPRPressure > AGPRThreshold ? (AGPRPressure - AGPRThreshold) : 0;
+
+ unsigned UnifiedSpill = 0;
+
+ if (ST.hasGFX90AInsts()) {
+ unsigned CombinedThreshold = ST.getMaxNumVGPRs(MF);
+ unsigned UnifiedPressure = getVGPRNum(true);
+ UnifiedSpill = UnifiedPressure > CombinedThreshold
+ ? (UnifiedPressure - CombinedThreshold)
+ : 0;
+ }
+
+ return std::max(UnifiedSpill, (ArchSpill + AGPRSpill));
+ }
+
void inc(unsigned Reg,
LaneBitmask PrevMask,
LaneBitmask NewMask,
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index c8ce3aab3f303..453e9d28f2a2e 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -30,6 +30,7 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/MachineCycleAnalysis.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/Support/ErrorHandling.h"
@@ -690,6 +691,7 @@ GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
const MachineSchedContext *C, bool IsLegacyScheduler)
: GCNSchedStrategy(C) {
SchedStages.push_back(GCNSchedStageID::OccInitialSchedule);
+ SchedStages.push_back(GCNSchedStageID::RewriteSchedule);
SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule);
SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule);
SchedStages.push_back(GCNSchedStageID::PreRARematerialize);
@@ -946,6 +948,8 @@ GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {
switch (SchedStageID) {
case GCNSchedStageID::OccInitialSchedule:
return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this);
+ case GCNSchedStageID::RewriteSchedule:
+ return std::make_unique<RewriteScheduleStage>(SchedStageID, *this);
case GCNSchedStageID::UnclusteredHighRPReschedule:
return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this);
case GCNSchedStageID::ClusteredLowOccupancyReschedule:
@@ -1183,6 +1187,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {
case GCNSchedStageID::OccInitialSchedule:
OS << "Max Occupancy Initial Schedule";
break;
+ case GCNSchedStageID::RewriteSchedule:
+ OS << "Instruction Rewriting Reschedule";
+ break;
case GCNSchedStageID::UnclusteredHighRPReschedule:
OS << "Unclustered High Register Pressure Reschedule";
break;
@@ -1216,6 +1223,112 @@ bool GCNSchedStage::initGCNSchedStage() {
return true;
}
+SlotIndex
+RewriteScheduleStage::findReachingDefs(MachineOperand &UseMO,
+ LiveIntervals *LIS,
+ SmallVectorImpl<SlotIndex> &DefIdxs) {
+ assert(UseMO.isReg());
+ MachineInstr *UseMI = UseMO.getParent();
+ LiveInterval &UseLI = LIS->getInterval(UseMO.getReg());
+ auto VNInfo = UseLI.getVNInfoAt(LIS->getInstructionIndex(*UseMI));
+
+ SlotIndex DefMBBStart =
+ LIS->getMBBStartIdx(LIS->getMBBFromIndex(VNInfo->def));
+
+ // If the def is in the block, then it must be the only reaching def.
+ if (DefMBBStart != VNInfo->def) {
+ DefIdxs.push_back(VNInfo->def);
+ return VNInfo->def;
+ }
+
+ SmallPtrSet<MachineBasicBlock *, 8> Visited;
+ SmallVector<MachineBasicBlock *, 8> Worklist;
+
+ Visited.insert(UseMI->getParent());
+
+ // Mark the predecessor blocks for traversal
+ for (auto PredMBB : UseMI->getParent()->predecessors()) {
+ Worklist.push_back(PredMBB);
+ Visited.insert(PredMBB);
+ }
+
+ while (!Worklist.empty()) {
+ MachineBasicBlock *CurrMBB = Worklist.pop_back_val();
+
+ SlotIndex CurrMBBEnd = LIS->getMBBEndIdx(CurrMBB);
+ auto VNInfo = UseLI.getVNInfoAt(CurrMBBEnd.getPrevSlot());
+
+ MachineBasicBlock *DefMBB = LIS->getMBBFromIndex(VNInfo->def);
+ SlotIndex DefMBBStart = LIS->getMBBStartIdx(DefMBB);
+
+ // If there is a def in this block, then add it to the list. This is the
+ // reaching def of this path.
+ if (DefMBBStart != VNInfo->def) {
+ DefIdxs.push_back(VNInfo->def);
+ continue;
+ }
+
+ for (auto PredMBB : DefMBB->predecessors()) {
+ if (Visited.insert(PredMBB).second)
+ Worklist.push_back(PredMBB);
+ }
+ }
+
+ return VNInfo->def;
+}
+
+void RewriteScheduleStage::findReachingUses(
+ MachineInstr *DefMI, LiveIntervals *LIS,
+ SmallVectorImpl<MachineOperand *> &ReachingUses) {
+ SlotIndex DefIdx = LIS->getInstructionIndex(*DefMI);
+ for (auto &UseMO :
+ DAG.MRI.use_nodbg_operands(DefMI->getOperand(0).getReg())) {
+ SmallVector<SlotIndex, 8> ReachingDefIndexes;
+ findReachingDefs(UseMO, LIS, ReachingDefIndexes);
+
+ // If we find a use that contains this DefMI in its reachingDefs, then it is
+ // a reaching use.
+ if (find_if(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {
+ return SlotIndex::isSameInstr(RDIdx, DefIdx);
+ }) != ReachingDefIndexes.end())
+ ReachingUses.push_back(&UseMO);
+ }
+}
+
+bool RewriteScheduleStage::initGCNSchedStage() {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+
+ RegionsWithExcessArchVGPR.resize(DAG.Regions.size());
+ RegionsWithExcessArchVGPR.reset();
+ for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) {
+ auto PressureBefore = DAG.Pressure[Region];
+ if (PressureBefore.getArchVGPRNum() > ST.getAddressableNumArchVGPRs())
+ RegionsWithExcessArchVGPR[Region] = true;
+ }
+
+ if (!ST.hasGFX90AInsts() || RegionsWithExcessArchVGPR.none())
+ return false;
+
+ TII = ST.getInstrInfo();
+ SRI = ST.getRegisterInfo();
+
+ std::vector<std::pair<MachineInstr *, unsigned>> RewriteCands;
+ DenseMap<MachineBasicBlock *, std::set<Register>> CopyForUse;
+ SmallPtrSet<MachineInstr *, 8> CopyForDef;
+
+ if (!initHeuristics(RewriteCands, CopyForUse, CopyForDef))
+ return false;
+
+ int64_t Cost = getRewriteCost(RewriteCands, CopyForUse, CopyForDef);
+
+ // If we haven't found the beneficial conditions, prefer the VGPR form which
+ // may result in less cross RC copies.
+ if (Cost > 0)
+ return false;
+
+ return rewrite(RewriteCands);
+}
+
bool UnclusteredHighRPStage::initGCNSchedStage() {
if (DisableUnclusterHighRP)
return false;
@@ -1837,6 +1950,534 @@ void GCNSchedStage::revertScheduling() {
DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
}
+bool RewriteScheduleStage::isRewriteCandidate(MachineInstr *MI) const {
+
+ if (!static_cast<const SIInstrInfo *>(DAG.TII)->isMAI(*MI))
+ return false;
+ return AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode()) != -1;
+}
+
+bool RewriteScheduleStage::initHeuristics(
+ std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
+ DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
+ SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
+ // Prepare for the heuristics
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ if (isRewriteCandidate(&MI)) {
+ int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode());
+ if (ReplacementOp == -1)
+ continue;
+
+ RewriteCands.push_back({&MI, MI.getOpcode()});
+ MI.setDesc(TII->get(ReplacementOp));
+
+ MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+ if (Src2->isReg()) {
+ SmallVector<SlotIndex, 8> Src2ReachingDefs;
+ findReachingDefs(*Src2, DAG.LIS, Src2ReachingDefs);
+
+ // For any definition of the src2 register which is non-MFMA, we
+ // insert a copy.
+ for (SlotIndex RDIdx : Src2ReachingDefs) {
+ MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIdx);
+ if (!TII->isMAI(*RD))
+ CopyForDef.insert(RD);
+ }
+ }
+
+ MachineOperand &Dst = MI.getOperand(0);
+ SmallVector<MachineOperand *, 8> DstReachingUses;
+
+ findReachingUses(&MI, DAG.LIS, DstReachingUses);
+
+ for (MachineOperand *RUOp : DstReachingUses) {
+ if (TII->isMAI(*RUOp->getParent()))
+ continue;
+
+ // For any user of the result of the MFMA which is not an MFMA, we
+ // insert a copy. For a given register, we will only insert one copy
+ // per user block.
+ CopyForUse[RUOp->getParent()->getParent()].insert(RUOp->getReg());
+
+ SmallVector<SlotIndex, 8> DstUsesReachingDefs;
+ findReachingDefs(*RUOp, DAG.LIS, DstUsesReachingDefs);
+
+ for (auto RDIndex : DstUsesReachingDefs) {
+ MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex);
+ if (TII->isMAI(*RD))
+ continue;
+
+ // For any definition of the user of the MFMA which is not an MFMA,
+ // we insert a copy. We do this to transform all the reaching defs
+ // of this use to AGPR. By doing this, we can insert a copy from
+ // AGPR to VGPR at the user rather than after the MFMA.
+ CopyForDef.insert(RD);
+ }
+ }
+
+ // Do the rewrite to allow for updated RP calculation.
+ const TargetRegisterClass *VGPRRC = DAG.MRI.getRegClass(Dst.getReg());
+ const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(VGPRRC);
+ DAG.MRI.setRegClass(Dst.getReg(), AGPRRC);
+ if (Src2->isReg())
+ DAG.MRI.setRegClass(Src2->getReg(), AGPRRC);
+ }
+ }
+ }
+
+ return true;
+}
+
+int64_t RewriteScheduleStage::getRewriteCost(
+ std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
+ DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
+ SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
+ MBFI.calculate(MF, MBPI, *DAG.MLI);
+ int64_t BestSpillCost = 0;
+ int64_t Cost = 0;
+
+ for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) {
+ if (!RegionsWithExcessArchVGPR[Region])
+ continue;
+
+ auto PressureBefore = DAG.Pressure[Region];
+ unsigned SpillCostBefore = PressureBefore.getVGPRSpills(ST, MF);
+
+ // For the cases we care about (i.e. ArchVGPR usage is greater than the
+ // addressable limit), rewriting alone should bring pressure to manageable
+ // level. If we find any such region, then the rewrite is potentially
+ // beneficial.
+ auto PressureAfter = DAG.getRealRegPressure(Region);
+ unsigned SpillCostAfter = PressureAfter.getVGPRSpills(ST, MF);
+
+ uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency();
+ uint64_t BlockFreq =
+ MBFI.getBlockFreq(DAG.Regions[Region].first->getParent())
+ .getFrequency();
+
+ bool RelativeFreqIsDenom = EntryFreq > BlockFreq;
+ uint64_t RelativeFreq = EntryFreq && BlockFreq
+ ? (RelativeFreqIsDenom ? EntryFreq / BlockFreq
+ : BlockFreq / EntryFreq)
+ : 1;
+
+ // This assumes perfect spilling / splitting -- using one spill / copy
+ // instruction and one restoreFrom / copy for each excess register,
+ int64_t SpillCost = ((int)SpillCostAfter - (int)SpillCostBefore) * 2;
+
+ // Also account for the block frequency.
+ if (RelativeFreqIsDenom)
+ SpillCost /= (int64_t)RelativeFreq;
+ else
+ SpillCost *= (int64_t)RelativeFreq;
+
+ // If we have increased spilling in any block, just bail.
+ if (SpillCost > 0)
+ return SpillCost;
+
+ if (SpillCost < BestSpillCost)
+ BestSpillCost = SpillCost;
+ }
+
+ // Set the cost to the largest decrease in spill cost in order to not double
+ // count spill reductions.
+ Cost = BestSpillCost;
+
+ assert(Cost <= 0);
+
+ unsigned CopyCost = 0;
+
+ uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency();
+
+ // For each CopyForDef, increase the cost by the register size while
+ // accounting for block frequency.
+ for (auto *DefMI : CopyForDef) {
+ auto DefReg = DefMI->getOperand(0).getReg();
+ uint64_t DefFreq =
+ EntryFreq
+ ? MBFI.getBlockFreq(DefMI->getParent()).getFrequency() / EntryFreq
+ : 1;
+
+ unsigned RegSize = DAG.TRI->getRegSizeInBits(*DAG.MRI.getRegClass(DefReg));
+ unsigned NumRegs = std::max(RegSize / 32, (unsigned)1);
+ CopyCost += NumRegs * DefFreq;
+ }
+
+ // Account for CopyForUse copies in each block that the register is used.
+ for (auto &UseEntry : CopyForUse) {
+ uint64_t UseFreq =
+ EntryFreq ? MBFI.getBlockFreq(UseEntry.first).getFrequency() / EntryFreq
+ : 1;
+
+ for (auto UseReg : UseEntry.second) {
+ unsigned RegSize =
+ DAG.TRI->getRegSizeInBits(*DAG.MRI.getRegClass(UseReg));
+ unsigned NumRegs = std::max(RegSize / 32, (unsigned)1);
+ CopyCost += NumRegs * UseFreq;
+ }
+ }
+
+ Cost += CopyCost;
+
+ // Reset to the vgpr form. We must do rewriting after copy-insertion, as some
+ // defs of the register may require VGPR.
+ for (auto RI : RewriteCands) {
+ MachineInstr *MI = RI.first;
+
+ assert(TII->isMAI(*MI));
+ const TargetRegisterClass *AGPRRC =
+ DAG.MRI.getRegClass(MI->getOperand(0).getReg());
+ const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(AGPRRC);
+
+ MachineOperand *Src2 = TII->getNamedOperand(*MI, AMDGPU::OpName::src2);
+ assert(Src2);
+
+ if (Src2->isReg()) {
+ DAG.MRI.setRegClass(Src2->getReg(), VGPRRC);
+ }
+ DAG.MRI.setRegClass(MI->getOperand(0).getReg(), VGPRRC);
+ MI->setDesc(TII->get(RI.second));
+ }
+
+ return Cost;
+}
+
+bool RewriteScheduleStage::rewrite(
+ std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands) {
+ DenseMap<MachineInstr *, unsigned> FirstMIToRegion;
+ DenseMap<MachineInstr *, unsigned> LastMIToRegion;
+
+ for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) {
+ auto Entry = DAG.Regions[Region];
+ if (Entry.first == Entry.second)
+ continue;
+
+ FirstMIToRegion[&*Entry.first] = Region;
+ if (Entry.second != Entry.first->getParent()->end())
+ LastMIToRegion[&*Entry.second] = Region;
+ }
+
+ // Rewrite the MFMAs to AGPR, and insert any copies as needed.
+ // The general assumption of the algorithm (and the previous cost calculation)
+ // is that it is better to insert the copies in the MBB of the def of the src2
+ // operands, and in the MBB of the user of the dest operands. This is based on
+ // the assumption that the MFMAs are likely to appear in loop bodies, while
+ // the src2 and dest operands are live-in / live-out of the loop. Due to this
+ // design, the algorithm for finding copy insertion points is more
+ // complicated.
+ //
+ // There are three main cases to handle: 1. the reaching defs of the src2
+ // operands, 2. the reaching uses of the dst operands, and 3. the reaching
+ // defs of the reaching uses of the dst operand.
+ //
+ // In the first case, we simply insert copies after each of the reaching
+ // definitions. In the second case, we collect all the uses of a given dest
+ // and organize them by MBB. Then, we insert 1 copy for each MBB before the
+ // earliest use. Since the use may have multiple reaching defs, and since we
+ // want to replace the register it is using with the result of the copy, we
+ // must handle case 3. In the third case, we simply insert a copy after each
+ // of the reaching defs to connect to the copy of the reaching uses of the dst
+ // reg. This allows us to avoid inserting copies next to the' MFMAs.
+ //
+ // While inserting the copies, we maintain a map of operands which will use
+ // different regs (i.e. the result of the copies). For example, a case 1 src2
+ // operand will use the register result of the copies after the reaching defs,
+ // as opposed to the original register. Now that we have completed our copy
+ // analysis and placement, we can bulk update the registers. We do this
+ // separately as to avoid complicating the reachingDef and reachingUse
+ // queries.
+ //
+ // While inserting the copies, we also maintain a list or registers which we
+ // will want to reclassify as AGPR. After doing the copy isnertion and the
+ // register replacement, we can finally do the reclassification. This uses the
+ // redef map, as the registers we are interested in reclassifying may be
+ // replaced by the result of a copy. We must do this after the copy analysis
+ // and placement as we must have an accurate redef map -- otherwise we may end
+ // up creating illegal instructions.
+
+ // The original registers of the MFMA that need to be reclassified as AGPR
+ std::set<Register> RewriteRegs;
+ // The map of an original register in the MFMA to a new register (result of a
+ // copy) that it should be replaced with.
+ DenseMap<Register, Register> RedefMap;
+ // The map of the original MFMA registers to the relevant MFMA operands.
+ DenseMap<Register, std::set<MachineOperand *>> ReplaceMap;
+ // The map of reaching defs for a given register -- to avoid duplicate copies.
+ DenseMap<Register, SmallPtrSet<MachineInstr *, 8>> ReachingDefCopyMap;
+ // The map of reaching uses for a given register by basic block -- to avoid
+ // duplicate copies and to calculate per MBB insert pts.
+ DenseMap<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>
+ ReachingUseTracker;
+
+ for (auto &RI : RewriteCands) {
+ MachineInstr &MI = *RI.first;
+
+ int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode());
+ if (ReplacementOp == -1)
+ continue;
+ MI.setDesc(TII->get(ReplacementOp));
+
+ // Case 1: insert copies for the reaching defs of the Src2Reg.
+ MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+
+ if (Src2->isReg()) {
+ Register Src2Reg = Src2->getReg();
+ if (!Src2Reg.isVirtual())
+ return false;
+
+ Register MappedReg = Src2->getReg();
+ SmallVector<SlotIndex, 8> Src2ReachingDefs;
+ findReachingDefs(*Src2, DAG.LIS, Src2ReachingDefs);
+ SmallVector<MachineInstr *, 8> Src2DefsReplace;
+
+ for (auto RDIndex : Src2ReachingDefs) {
+ MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex);
+ if (TII->isMAI(*RD))
+ continue;
+
+ // If there is a non mai reaching def, then we need a copy.
+ if (find(Src2DefsReplace, RD) == Src2DefsReplace.end())
+ Src2DefsReplace.push_back(RD);
+ }
+
+ if (!Src2DefsReplace.empty()) {
+ if (RedefMap.contains(Src2Reg))
+ MappedReg = RedefMap[Src2Reg];
+ else {
+ assert(!ReachingDefCopyMap.contains(Src2Reg));
+ const TargetRegisterClass *Src2RC = DAG.MRI.getRegClass(Src2Reg);
+ const TargetRegisterClass *VGPRRC =
+ SRI->getEquivalentVGPRClass(Src2RC);
+
+ // Track the mapping of the original register to the new register.
+ MappedReg = DAG.MRI.createVirtualRegister(VGPRRC);
+ RedefMap[Src2Reg] = MappedReg;
+ }
+
+ // If none exists, create a copy from this reaching def.
+ // We may have inserted a copy already in an earlier iteration.
+ for (MachineInstr *RD : Src2DefsReplace) {
+ // Do not create redundant copies.
+ if (ReachingDefCopyMap[Src2Reg].insert(RD).second) {
+ MachineInstrBuilder VGPRCopy =
+ BuildMIAfter(*RD->getParent(), RD->getIterator(),
+ RD->getDebugLoc(), TII->get(TargetOpcode::COPY))
+ .addDef(MappedReg, 0, 0)
+ .addUse(Src2Reg, 0, 0);
+ DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
+
+ // If this reaching def was the last MI in the region, update the
+ // region boundaries.
+ if (LastMIToRegion.contains(RD)) {
+ unsigned UpdateRegion = LastMIToRegion[RD];
+ DAG.Regions[UpdateRegion].second = VGPRCopy;
+ LastMIToRegion.erase(RD);
+ }
+ }
+ }
+ }
+
+ // Track the register for reclassification
+ RewriteRegs.insert(Src2Reg);
+
+ // Always insert the operand for replacement. If this corresponds with a
+ // chain of tied-def we may not see the VGPR requirement until later.
+ ReplaceMap[Src2Reg].insert(Src2);
+ }
+
+ // Case 2 and Case 3: insert copies before the reaching uses of the dsts,
+ // and after the reaching defs of the reaching uses of the dsts.
+
+ MachineOperand *Dst = &MI.getOperand(0);
+ Register DstReg = Dst->getReg();
+ if (!DstReg.isVirtual())
+ return false;
+
+ Register MappedReg = DstReg;
+ SmallVector<MachineOperand *, 8> DstReachingUses;
+
+ SmallVector<MachineOperand *, 8> DstReachingUseCopies;
+ SmallVector<MachineInstr *, 8> DstUseDefsReplace;
+
+ findReachingUses(&MI, DAG.LIS, DstReachingUses);
+
+ for (MachineOperand *RUOp : DstReachingUses) {
+ if (TII->isMAI(*RUOp->getParent()))
+ continue;
+
+ // If there is a non mai reaching use, then we need a copy.
+ if (find(DstReachingUseCopies, RUOp) == DstReachingUseCopies.end())
+ DstReachingUseCopies.push_back(RUOp);
+ SmallVector<SlotIndex, 8> DstUsesReachingDefs;
+ findReachingDefs(*RUOp, DAG.LIS, DstUsesReachingDefs);
+
+ for (auto RDIndex : DstUsesReachingDefs) {
+ MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex);
+ if (TII->isMAI(*RD))
+ continue;
+
+ // If there is a non mai reaching def of this reaching use, then we will
+ // need a copy.
+ if (find(DstUseDefsReplace, RD) == DstUseDefsReplace.end())
+ DstUseDefsReplace.push_back(RD);
+ }
+ }
+
+ if (!DstUseDefsReplace.empty()) {
+ if (RedefMap.contains(DstReg))
+ MappedReg = RedefMap[DstReg];
+ else {
+ assert(!ReachingDefCopyMap.contains(DstReg));
+ const TargetRegisterClass *DstRC = DAG.MRI.getRegClass(DstReg);
+ const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
+
+ // Track the mapping of the original register to the new register.
+ MappedReg = DAG.MRI.createVirtualRegister(VGPRRC);
+ RedefMap[DstReg] = MappedReg;
+ }
+
+ // If none exists, create a copy from this reaching def.
+ // We may have inserted a copy already in an earlier iteration.
+ for (MachineInstr *RD : DstUseDefsReplace) {
+ // Do not create reundant copies.
+ if (ReachingDefCopyMap[DstReg].insert(RD).second) {
+ MachineInstrBuilder VGPRCopy =
+ BuildMIAfter(*RD->getParent(), RD->getIterator(),
+ RD->getDebugLoc(), TII->get(TargetOpcode::COPY))
+ .addDef(MappedReg, 0, 0)
+ .addUse(DstReg, 0, 0);
+ DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
+
+ // If this reaching def was the last MI in the region, update the
+ // region boundaries.
+ if (LastMIToRegion.contains(RD)) {
+ unsigned UpdateRegion = LastMIToRegion[RD];
+ DAG.Regions[UpdateRegion].second = VGPRCopy;
+ LastMIToRegion.erase(RD);
+ }
+ }
+ }
+ }
+
+ for (MachineOperand *RU : DstReachingUseCopies) {
+ MachineBasicBlock *RUBlock = RU->getParent()->getParent();
+ // Just keep track of the reaching use of this register by block. After we
+ // have scanned all the MFMAs we can find optimal insert pts.
+ if (RUBlock != MI.getParent()) {
+ ReachingUseTracker[RUBlock->getNumber()][DstReg].insert(RU);
+ continue;
+ }
+
+ // Special case, the use is in the same block as the MFMA. Insert the copy
+ // just before the use.
+ const TargetRegisterClass *DstRC = DAG.MRI.getRegClass(DstReg);
+ const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
+ Register NewUseReg = DAG.MRI.createVirtualRegister(VGPRRC);
+ MachineInstr *UseInst = RU->getParent();
+ MachineInstrBuilder VGPRCopy =
+ BuildMI(*UseInst->getParent(), UseInst->getIterator(),
+ UseInst->getDebugLoc(), TII->get(TargetOpcode::COPY))
+ .addDef(NewUseReg, 0, 0)
+ .addUse(DstReg, 0, 0);
+ DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
+ // Since we know this use has only one reaching def, we can replace the
+ // use reg.
+ RU->setReg(NewUseReg);
+ // Track the copy source operand for replacement.
+ ReplaceMap[DstReg].insert(&VGPRCopy->getOperand(1));
+ }
+
+ // Track the register for reclassification
+ RewriteRegs.insert(DstReg);
+ // Insert the dst operand for replacement. If this dst is in a chain of
+ // tied-def MFMAs, and the first src2 needs to be replaced with a new reg,
+ // all the correspond operands need to be replaced.
+ ReplaceMap[DstReg].insert(Dst);
+ }
+
+ // Handle the copies for dst uses.
+ for (auto RUBlockEntry : ReachingUseTracker) {
+ for (auto RUDst : RUBlockEntry.second) {
+ MachineOperand *OpBegin = *RUDst.second.begin();
+ SlotIndex InstPt = DAG.LIS->getInstructionIndex(*OpBegin->getParent());
+
+ // Find the earliest use in this block.
+ for (auto User : RUDst.second) {
+ SlotIndex NewInstPt = DAG.LIS->getInstructionIndex(*User->getParent());
+ if (SlotIndex::isEarlierInstr(NewInstPt, InstPt))
+ InstPt = NewInstPt;
+ }
+
+ const TargetRegisterClass *DstRC = DAG.MRI.getRegClass(RUDst.first);
+ const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
+ Register NewUseReg = DAG.MRI.createVirtualRegister(VGPRRC);
+ MachineInstr *UseInst = DAG.LIS->getInstructionFromIndex(InstPt);
+
+ MachineInstrBuilder VGPRCopy =
+ BuildMI(*UseInst->getParent(), UseInst->getIterator(),
+ UseInst->getDebugLoc(), TII->get(TargetOpcode::COPY))
+ .addDef(NewUseReg, 0, 0)
+ .addUse(RUDst.first, 0, 0);
+ DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
+
+ // If this UseInst was the first MI in the region, update the region
+ // boundaries.
+ if (LastMIToRegion.contains(UseInst)) {
+ unsigned UpdateRegion = FirstMIToRegion[UseInst];
+ DAG.Regions[UpdateRegion].first = VGPRCopy;
+ LastMIToRegion.erase(UseInst);
+ }
+
+ // Replace the operand for all users.
+ for (auto User : RUDst.second) {
+ User->setReg(NewUseReg);
+ }
+
+ // Track the copy source operand for replacement.
+ ReplaceMap[RUDst.first].insert(&VGPRCopy->getOperand(1));
+ }
+ }
+
+ // We may have needed to insert copies after the reaching defs of the MFMAs.
+ // Replace the original register with the result of the copy for all relevant
+ // operands.
+ for (auto NewDef : RedefMap) {
+ Register OldReg = NewDef.first;
+ Register NewReg = NewDef.second;
+
+ // Replace the register for any associated operand in the MFMA chain.
+ for (MachineOperand *ReplaceOp : ReplaceMap[OldReg]) {
+ ReplaceOp->setReg(NewReg);
+ }
+ }
+
+ // Finally, do the reclassification of the MFMA registers.
+ for (auto RewriteReg : RewriteRegs) {
+ Register RegToRewrite = RewriteReg;
+
+ // Be sure to update the replacement register and not the original.
+ if (RedefMap.contains(RewriteReg))
+ RegToRewrite = RedefMap[RewriteReg];
+
+ const TargetRegisterClass *CurrRC = DAG.MRI.getRegClass(RegToRewrite);
+ const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(CurrRC);
+
+ DAG.MRI.setRegClass(RegToRewrite, AGPRRC);
+ }
+
+ // Bulk update the LIS.
+ DAG.LIS->reanalyze(DAG.MF);
+ // Liveins may have been modified for cross RC copies
+ RegionPressureMap LiveInUpdater(&DAG, false);
+ LiveInUpdater.buildLiveRegMap();
+
+ for (unsigned Region = 0; Region < DAG.Regions.size(); Region++)
+ DAG.LiveIns[Region] = LiveInUpdater.getLiveRegsForRegionIdx(Region);
+
+ return true;
+}
+
bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
const Function &F = MF.getFunction();
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 95a931b9beb2a..e2d4f49b4ef16 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -16,6 +16,9 @@
#include "GCNRegPressure.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineScheduler.h"
@@ -28,11 +31,12 @@ class GCNSchedStage;
enum class GCNSchedStageID : unsigned {
OccInitialSchedule = 0,
- UnclusteredHighRPReschedule = 1,
- ClusteredLowOccupancyReschedule = 2,
- PreRARematerialize = 3,
- ILPInitialSchedule = 4,
- MemoryClauseInitialSchedule = 5
+ RewriteSchedule = 1,
+ UnclusteredHighRPReschedule = 2,
+ ClusteredLowOccupancyReschedule = 3,
+ PreRARematerialize = 4,
+ ILPInitialSchedule = 5,
+ MemoryClauseInitialSchedule = 6
};
#ifndef NDEBUG
@@ -239,6 +243,7 @@ using RegionBoundaries =
class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
friend class GCNSchedStage;
friend class OccInitialScheduleStage;
+ friend class RewriteScheduleStage;
friend class UnclusteredHighRPStage;
friend class ClusteredLowOccStage;
friend class PreRARematStage;
@@ -413,6 +418,61 @@ class OccInitialScheduleStage : public GCNSchedStage {
: GCNSchedStage(StageID, DAG) {}
};
+class RewriteScheduleStage : public GCNSchedStage {
+private:
+ // Record regions with excess archvgpr register pressure over the physical
+ // register limit. Register pressure in these regions usually will result in
+ // spilling.
+ BitVector RegionsWithExcessArchVGPR;
+
+ MachineBranchProbabilityInfo MBPI;
+ MachineBlockFrequencyInfo MBFI;
+
+ const SIInstrInfo *TII;
+ const SIRegisterInfo *SRI;
+
+ /// Do a speculative rewrite and collect copy locations. The speculative
+ /// rewrite allows us to calulcate the RP of the code after the rewrite, and
+ /// the copy locations allow us to calculate the total cost of copies required
+ /// for the rewrite. Stores the rewritten instructions in \p RewriteCands ,
+ /// the copy locations for uses (of the MFMA result) in \p CopyForUse and the
+ /// copy locations for defs (of the MFMA operands) in \p CopyForDef
+ bool
+ initHeuristics(std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
+ DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
+ SmallPtrSetImpl<MachineInstr *> &CopyForDef);
+
+ /// Calculate the rewrite cost and undo the state change (e.g. rewriting) done
+ /// in initHueristics. Uses \p CopyForUse and \p CopyForDef to calculate copy
+ /// costs, and \p RewriteCands to undo rewriting.
+ int64_t
+ getRewriteCost(std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
+ DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
+ SmallPtrSetImpl<MachineInstr *> &CopyForDef);
+
+ /// Do the final rewrite on \p RewriteCands and insert any needed copies.
+ bool rewrite(std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands);
+
+ /// \returns true if this MI is a rewrite candidate.
+ bool isRewriteCandidate(MachineInstr *MI) const;
+
+ /// Finds all the reaching defs of \p UseMO and stores the SlotIndexes into \p
+ /// DefIdx
+ SlotIndex findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS,
+ SmallVectorImpl<SlotIndex> &DefIdxs);
+
+ /// Finds all the reaching uses of \p DefMI and stores the use operands in \p
+ /// ReachingUses
+ void findReachingUses(MachineInstr *DefMI, LiveIntervals *LIS,
+ SmallVectorImpl<MachineOperand *> &ReachingUses);
+
+public:
+ bool initGCNSchedStage() override;
+
+ RewriteScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
+ : GCNSchedStage(StageID, DAG) {}
+};
+
class UnclusteredHighRPStage : public GCNSchedStage {
private:
// Save the initial occupancy before starting this stage.
diff --git a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir
new file mode 100644
index 0000000000000..73eeafb6bccc5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir
@@ -0,0 +1,5591 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-scheduler -o - %s | FileCheck %s
+
+--- |
+ define void @src2_singledef_singleuse_dst_singleuse_singledef_vgpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_singledef_multiuse_dst_singleuse_singledef_vgpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_multidef_singleuse_dst_singleuse_singledef_vgpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_multidef_multiuse_dst_singleuse_singledef_vgpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_singledef_singleuse_dst_singleuse_multidef_vgpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_multidef_singleuse_dst_singleuse_multidef_vgpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_singledef_multiuse_dst_singleuse_multidef_vgpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_multidef_multiuse_dst_singleuse_multidef_vgpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_singledef_singleuse_dst_multiuse_singledef_vgpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_multidef_singleuse_dst_multiuse_singledef_vgpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_singledef_multiuse_dst_multiuse_singledef_vgpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_multidef_multiuse_dst_multiuse_singledef_vgpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_singledef_singleuse_dst_multiuse_multidef_vgpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_singledef_multiuse_dst_multiuse_multidef_vgpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_multidef_singleuse_dst_multiuse_multidef_vgpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_multidef_multiuse_dst_multiuse_multidef_vgpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_singledef_singleuse_dst_singleuse_singledef_agpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_multidef_singleuse_dst_singleuse_singledef_agpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_singledef_multiuse_dst_singleuse_singleedef_agpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_multidef_multiuse_dst_singleuse_singledef_agpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_singledef_singleuse_dst_singleuse_multidef_agpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_multidef_singleuse_dst_singleuse_multidef_agpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_singledef_multiuse_dst_singleuse_multidef_agpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_multidef_multiuse_dst_singleuse_multidef_agpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_singledef_singleuse_dst_multiuse_singledef_agpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_multidef_singleuse_dst_multiuse_singledef_agpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_singledef_multiuse_dst_multiuse_singledef_agpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_multidef_multiuse_dst_multiuse_singledef_agpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_singledef_singleuse_dst_multiuse_multidef_agpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_multidef_singleuse_dst_multiuse_multidef_agpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_singledef_multiuse_dst_multiuse_multidef_agpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_multidef_multiuse_dst_multiuse_multidef_agpr() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_singledef_singleuse_dst_singleuse_singledef_mixed() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_multidef_singleuse_dst_multiuse_multidef_mixed() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_singledef_multiuse_dst_singleuse_multidef_mixed() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @src2_multidef_multiuse_dst_multiuse_multidef_mixed() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @no_copy_for_mfma() #0 {
+ entry:
+ unreachable
+ }
+
+ attributes #0 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64"}
+...
+
+
+---
+name: src2_singledef_singleuse_dst_singleuse_singledef_vgpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_singledef_vgpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]], [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+
+ bb.2:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.3:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+---
+name: src2_singledef_multiuse_dst_singleuse_singledef_vgpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_singledef_vgpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]], [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+
+ bb.1:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.2:
+ %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+
+---
+name: src2_multidef_singleuse_dst_singleuse_singledef_vgpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_singledef_vgpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+
+ bb.4:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.7:
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+---
+name: src2_multidef_multiuse_dst_singleuse_singledef_vgpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_singledef_vgpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]], [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+
+ bb.4:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.7:
+ %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+
+
+---
+name: src2_singledef_singleuse_dst_singleuse_multidef_vgpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_multidef_vgpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]]
+ ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+
+ bb.2:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.3:
+ KILL %89, %90, %91, %92, %93, %193
+
+ bb.4:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+---
+name: src2_multidef_singleuse_dst_singleuse_multidef_vgpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_multidef_vgpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]]
+ ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+
+ bb.2:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.3:
+ KILL %89, %90, %91, %92, %93, %193
+
+ bb.4:
+ %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
+ undef %95.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %95
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+
+---
+name: src2_singledef_multiuse_dst_singleuse_multidef_vgpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_multidef_vgpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]]
+ ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY13]], [[COPY12]], [[COPY14]], [[COPY11]], [[V_ADD_U32_e32_2]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.4:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.7, implicit killed $scc
+
+ bb.5:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.6:
+ KILL %89, %90, %91, %92, %93, %193
+
+ bb.7:
+ undef %95.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %95
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+
+---
+name: src2_multidef_multiuse_dst_singleuse_multidef_vgpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_multidef_vgpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]]
+ ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY13]], [[COPY12]], [[COPY14]], [[COPY11]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.4:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.7, implicit killed $scc
+
+ bb.5:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.6:
+ KILL %89, %90, %91, %92, %93, %193
+
+ bb.7:
+ %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
+ undef %95.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %95
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+
+---
+name: src2_singledef_singleuse_dst_multiuse_singledef_vgpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_singledef_vgpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+
+ bb.1:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.2:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+
+ bb.3:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
+ S_BRANCH %bb.5
+
+ bb.4:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+
+ bb.5:
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+
+---
+name: src2_multidef_singleuse_dst_multiuse_singledef_vgpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_singledef_vgpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+
+ bb.1:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.2:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+
+ bb.3:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ S_BRANCH %bb.5
+
+ bb.4:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
+
+ bb.5:
+ %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+
+...
+
+---
+name: src2_singledef_multiuse_dst_multiuse_singledef_vgpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_singledef_vgpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.7(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.7(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+
+ bb.4:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.5:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.7, implicit killed $scc
+
+ bb.6:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ S_BRANCH %bb.8
+
+ bb.7:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
+
+ bb.8:
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+
+---
+name: src2_multidef_multiuse_dst_multiuse_singledef_vgpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_singledef_vgpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.7(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.7(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+
+ bb.4:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.5:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.7, implicit killed $scc
+
+ bb.6:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ S_BRANCH %bb.8
+
+ bb.7:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
+
+ bb.8:
+ %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+---
+name: src2_singledef_singleuse_dst_multiuse_multidef_vgpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_multidef_vgpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_6:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_7:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_8:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_9:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_10:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_11:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_8]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_10]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_7]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_9]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]]
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_11]]
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_6]]
+ ; CHECK-NEXT: KILL [[COPY10]], [[COPY5]], [[COPY12]], [[COPY7]], [[COPY14]], [[COPY9]], [[COPY16]], [[COPY11]], [[COPY6]], [[COPY13]], [[COPY8]], [[COPY15]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
+ ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY18]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY22:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY19]], [[COPY21]], [[COPY20]], [[COPY22]], [[V_ADD_U32_e32_3]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+
+ bb.1:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %194:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %195:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %196:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %197:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %198:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %199:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+
+ bb.2:
+ KILL %89, %90, %91, %92, %93, %193, %194, %195, %196, %197, %198, %199
+
+
+ bb.3:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.5, implicit killed $scc
+
+ bb.4:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
+ S_BRANCH %bb.5
+
+ bb.5:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+
+ bb.6:
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+---
+name: src2_singledef_multiuse_dst_multiuse_multidef_vgpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_multidef_vgpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]]
+ ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY13]], [[COPY15]], [[COPY14]], [[COPY16]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+
+ bb.1:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.2:
+ KILL %89, %90, %91, %92, %93, %193
+
+ bb.3:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.5, implicit killed $scc
+
+ bb.4:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ S_BRANCH %bb.6
+
+ bb.5:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
+
+ bb.6:
+ %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+---
+name: src2_multidef_singleuse_dst_multiuse_multidef_vgpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_multidef_vgpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]]
+ ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.7(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.8, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.9(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8:
+ ; CHECK-NEXT: successors: %bb.9(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.9:
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY15]], [[COPY13]], [[COPY16]], [[COPY14]], [[V_ADD_U32_e32_2]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.4:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.7, implicit killed $scc
+
+ bb.5:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.6:
+ KILL %89, %90, %91, %92, %93, %193
+
+
+ bb.7:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.9, implicit killed $scc
+
+ bb.8:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ S_BRANCH %bb.10
+
+ bb.9:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
+
+ bb.10:
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+---
+name: src2_multidef_multiuse_dst_multiuse_multidef_vgpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_multidef_vgpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY5]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY5]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8:
+ ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.4:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+
+ bb.5:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.6:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.8, implicit killed $scc
+
+ bb.7:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ S_BRANCH %bb.9
+
+ bb.8:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
+
+ bb.9:
+ %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+
+...
+
+---
+name: src2_singledef_singleuse_dst_singleuse_singledef_agpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_singledef_agpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: KILL [[COPY2]], [[COPY5]], [[COPY3]], [[COPY6]], [[COPY4]], [[COPY1]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY7]], 0, 0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+
+ bb.2:
+ %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.3:
+ KILL %89, %90, %91, %92, %93, %193
+
+ bb.4:
+ DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %84
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+
+
+---
+name: src2_multidef_singleuse_dst_singleuse_singledef_agpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_singledef_agpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: KILL [[COPY2]], [[COPY5]], [[COPY3]], [[COPY6]], [[COPY4]], [[COPY1]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
+ ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[COPY7]].sub0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.1:
+ %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.2:
+ KILL %89, %90, %91, %92, %93, %193
+
+
+ bb.3:
+ %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %84, %94
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+
+---
+name: src2_singledef_multiuse_dst_singleuse_singleedef_agpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_singleedef_agpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 128, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec
+
+ bb.4:
+ %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.7:
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %84
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+
+---
+name: src2_multidef_multiuse_dst_singleuse_singledef_agpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_singledef_agpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 128, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec
+
+ bb.4:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.7:
+ DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+---
+name: src2_singledef_singleuse_dst_singleuse_multidef_agpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_multidef_agpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 0, 0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY6]], [[COPY8]], [[COPY7]], [[COPY5]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.3:
+ DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+
+---
+name: src2_multidef_singleuse_dst_singleuse_multidef_agpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_multidef_agpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 128, 0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY6]], [[COPY8]], [[COPY7]], [[COPY5]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec
+
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.3:
+ DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec
+ DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 128, 0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+---
+name: src2_singledef_multiuse_dst_singleuse_multidef_agpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_multidef_agpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 128, 0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY6]], [[COPY8]], [[COPY5]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.4:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+
+ bb.5:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.6:
+ DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 128, 0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+---
+name: src2_multidef_multiuse_dst_singleuse_multidef_agpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_multidef_agpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_1]], 128, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 384, 0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY6]], [[COPY8]], [[COPY5]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.4:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+
+ bb.5:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.6:
+ DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 128, 0, implicit $exec
+ DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 384, 0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+
+
+---
+name: src2_singledef_singleuse_dst_multiuse_singledef_agpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_singledef_agpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY2]], 128, 0, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
+
+ bb.1:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.2:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+
+ bb.3:
+ DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec
+ S_BRANCH %bb.5
+
+ bb.4:
+ DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 128, 0, implicit $exec
+
+ bb.5:
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+---
+name: src2_multidef_singleuse_dst_multiuse_singledef_agpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_singledef_agpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub0, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub1, 256, 0, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub1, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub0, 256, 0, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
+
+ bb.1:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.2:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+
+ bb.3:
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 256, 0, implicit $exec
+ S_BRANCH %bb.5
+
+ bb.4:
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 256, 0, implicit $exec
+
+ bb.5:
+ DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+
+...
+
+
+---
+name: src2_singledef_multiuse_dst_multiuse_singledef_agpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_singledef_agpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 256, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.7(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub0, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub1, 128, 0, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.7(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub1, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub0, 128, 0, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec
+
+ bb.4:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.5:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.7, implicit killed $scc
+
+ bb.6:
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec
+ S_BRANCH %bb.8
+
+ bb.7:
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec
+
+ bb.8:
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+
+
+---
+name: src2_multidef_multiuse_dst_multiuse_singledef_agpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_singledef_agpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 256, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.7(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub0, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub1, 128, 0, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.7(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub1, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub0, 128, 0, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[DS_READ_B128_gfx9_]].sub0, 256, 0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec
+
+ bb.4:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.5:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.7, implicit killed $scc
+
+ bb.6:
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec
+ S_BRANCH %bb.8
+
+ bb.7:
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec
+
+ bb.8:
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %84.sub0:vreg_128_align2, 256, 0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+---
+name: src2_singledef_singleuse_dst_multiuse_multidef_agpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_multidef_agpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 0, 0, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY6]], 256, 0, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY9]], [[COPY8]], [[COPY10]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.1:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.2:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+
+ bb.3:
+ DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec
+ S_BRANCH %bb.5
+
+ bb.4:
+ DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 256, 0, implicit $exec
+
+ bb.5:
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+
+
+---
+name: src2_multidef_singleuse_dst_multiuse_multidef_agpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_multidef_agpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub0, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub1, 256, 0, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub1, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub0, 256, 0, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY9]], [[COPY8]], [[COPY10]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.1:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.2:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+
+ bb.3:
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 256, 0, implicit $exec
+ S_BRANCH %bb.5
+
+ bb.4:
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 256, 0, implicit $exec
+
+ bb.5:
+ DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+
+
+---
+name: src2_singledef_multiuse_dst_multiuse_multidef_agpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_multidef_agpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub0, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub1, 128, 0, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub1, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub0, 128, 0, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8:
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.4:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+
+ bb.5:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.6:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.8, implicit killed $scc
+
+ bb.7:
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec
+ S_BRANCH %bb.9
+
+ bb.8:
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec
+
+ bb.9:
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+
+---
+name: src2_multidef_multiuse_dst_multiuse_multidef_agpr
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_multidef_agpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub0, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub1, 128, 0, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub1, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub0, 128, 0, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8:
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_1]], 256, 0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.4:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+
+ bb.5:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.6:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.8, implicit killed $scc
+
+ bb.7:
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec
+ S_BRANCH %bb.9
+
+ bb.8:
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec
+ DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec
+
+ bb.9:
+ DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 256, 0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+
+...
+
+---
+name: src2_singledef_singleuse_dst_singleuse_singledef_mixed
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_singledef_mixed
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+
+ bb.2:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.3:
+ DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+
+
+---
+name: src2_multidef_singleuse_dst_multiuse_multidef_mixed
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_multidef_mixed
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY5]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_BRANCH %bb.8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: undef [[DEF21:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[DEF21:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8:
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]], [[DEF21]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.4:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+
+ bb.5:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.6:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.8, implicit killed $scc
+
+ bb.7:
+ DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec
+ %94:vreg_128_align2 = IMPLICIT_DEF
+ S_BRANCH %bb.9
+
+ bb.8:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
+
+ bb.9:
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+
+...
+
+---
+name: src2_singledef_multiuse_dst_singleuse_multidef_mixed
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_multidef_mixed
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]]
+ ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY11]], 0, 0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+
+ bb.2:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.3:
+ KILL %89, %90, %91, %92, %93, %193
+
+ bb.4:
+ %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
+ DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+
+---
+name: src2_multidef_multiuse_dst_multiuse_multidef_mixed
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_multidef_mixed
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[DS_READ_B128_gfx9_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.7(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.7(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY2]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DS_READ_B128_gfx9_]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+
+ bb.4:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.5:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.7, implicit killed $scc
+
+ bb.6:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ S_BRANCH %bb.8
+
+ bb.7:
+ DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec
+ %94:vreg_128_align2 = IMPLICIT_DEF
+
+ bb.8:
+ %95:vgpr_32 = V_ADD_U32_e32 %84.sub0, %64, implicit $exec
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %95
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+---
+name: no_copy_for_mfma
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: no_copy_for_mfma
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: dead [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = COPY [[DEF21]]
+ ; CHECK-NEXT: [[DEF22:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = COPY [[DEF22]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY4]], [[COPY3]], [[COPY5]], [[COPY2]], [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+ %88:vreg_128_align2 = IMPLICIT_DEF
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ %88:vreg_128_align2 = IMPLICIT_DEF
+ S_BRANCH %bb.4
+
+
+ bb.3:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.5, implicit killed $scc
+
+ bb.4:
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+
+ bb.5:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir
new file mode 100644
index 0000000000000..050e4bc5e941c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir
@@ -0,0 +1,524 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-scheduler -o - %s | FileCheck %s
+
+--- |
+ define void @more_copies_than_spills() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @less_copies_than_spills() #0 {
+ entry:
+ unreachable
+ }
+
+ define void @low_pressure() {
+ entry:
+ unreachable
+ }
+
+ attributes #0 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64"}
+...
+
+
+---
+name: more_copies_than_spills
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: more_copies_than_spills
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.9(0x40000000), %bb.8(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.9, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8:
+ ; CHECK-NEXT: successors: %bb.9(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_3:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.9:
+ ; CHECK-NEXT: successors: %bb.10(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_4:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub1, [[DEF18]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_4:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub2, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.10:
+ ; CHECK-NEXT: [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF18]].sub0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_4]], [[V_ADD_U32_e32_5]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.4:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+
+ bb.5:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.6:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.8, implicit killed $scc
+
+ bb.7:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec
+ S_BRANCH %bb.9
+
+ bb.8:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.10, implicit killed $scc
+
+ bb.9:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec
+
+ bb.10:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %85.sub0, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub2, %64, implicit $exec
+
+ bb.11:
+ %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %85.sub0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %94, %104
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+
+...
+
+
+---
+name: less_copies_than_spills
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: less_copies_than_spills
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %12
+ ; CHECK-NEXT: S_NOP 0, implicit-def %13
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: dead [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY3]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY3]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8:
+ ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
+ ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY4]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
+ ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.4:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+
+ bb.5:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.6:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.8, implicit killed $scc
+
+ bb.7:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec
+ S_BRANCH %bb.9
+
+ bb.8:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec
+
+ bb.9:
+ %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %94, %104
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+...
+
+
+---
+name: low_pressure
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+ workItemIDX: { reg: '$vgpr0' }
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; CHECK-LABEL: name: low_pressure
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_NOP 0, implicit-def %5
+ ; CHECK-NEXT: S_NOP 0, implicit-def %6
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF8:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF9:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF6]].sub1, [[DEF5]], implicit $exec
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF6]].sub0, [[DEF5]], implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF6]].sub1, [[DEF5]], implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF13]].sub0, [[DEF5]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF7]].sub1, [[DEF5]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF7]].sub0, [[DEF5]], implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF7]].sub0, [[DEF5]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF7]].sub1, [[DEF5]], implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8:
+ ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF6]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec
+ ; CHECK-NEXT: SCHED_BARRIER 0
+ ; CHECK-NEXT: KILL [[DEF4]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF12]], [[DEF6]], [[DEF7]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
+ ; CHECK-NEXT: S_NOP 0, implicit %5, implicit %6
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5
+ %8:vreg_512 = IMPLICIT_DEF
+ %10:vreg_64 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vreg_128 = IMPLICIT_DEF
+ %13:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit-def %50:av_512
+ S_NOP 0, implicit-def %51:av_512
+ SCHED_BARRIER 0
+ %60:av_128_align2 = IMPLICIT_DEF
+ %61:av_128_align2 = IMPLICIT_DEF
+ %62:vreg_128_align2 = IMPLICIT_DEF
+ %63:vreg_64_align2 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %72:vreg_128_align2 = IMPLICIT_DEF
+ %85:vreg_128_align2 = IMPLICIT_DEF
+ %86:vreg_128_align2 = IMPLICIT_DEF
+ %87:vreg_128_align2 = IMPLICIT_DEF
+ undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+
+ bb.2:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.4:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+
+ bb.5:
+ %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec
+
+ bb.6:
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.8, implicit killed $scc
+
+ bb.7:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec
+ S_BRANCH %bb.9
+
+ bb.8:
+ undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec
+ %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec
+
+ bb.9:
+ %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec
+ SCHED_BARRIER 0
+ KILL %8, %10, %11, %12, %13, %62, %72, %85, %94, %104
+ S_NOP 0, implicit %50, implicit %51
+ S_ENDPGM 0
+
+...
>From 5e28ba3b7637cd408bc3a2d1ecb0bd3c56b757d7 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 22 Aug 2025 16:14:19 -0700
Subject: [PATCH 2/8] Do not rewrite to AGPR if waves-per-eu >= 2
Change-Id: I4ab71a3c739a203399a201e47d6b37ceba723bf2
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 453e9d28f2a2e..54a46e4ef222e 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1297,6 +1297,8 @@ void RewriteScheduleStage::findReachingUses(
bool RewriteScheduleStage::initGCNSchedStage() {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ if (!ST.hasGFX90AInsts() || MFI.getMinWavesPerEU() > 1)
+ return false;
RegionsWithExcessArchVGPR.resize(DAG.Regions.size());
RegionsWithExcessArchVGPR.reset();
@@ -1306,7 +1308,7 @@ bool RewriteScheduleStage::initGCNSchedStage() {
RegionsWithExcessArchVGPR[Region] = true;
}
- if (!ST.hasGFX90AInsts() || RegionsWithExcessArchVGPR.none())
+ if (RegionsWithExcessArchVGPR.none())
return false;
TII = ST.getInstrInfo();
>From abc85a6e75ffb7e71e99fae189386ae6a419e0ed Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 4 Sep 2025 16:22:01 -0700
Subject: [PATCH 3/8] Review comments
Change-Id: I99db02cea2777024b4948a55d6a298c384f40534
---
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 3 +-
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 130 ++++++++++----------
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 22 ++--
3 files changed, 74 insertions(+), 81 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 7ca8ea7be09f0..d13d1ddd9c0eb 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -102,7 +102,8 @@ struct GCNRegPressure {
DynamicVGPRBlockSize));
}
- unsigned getVGPRSpills(const GCNSubtarget &ST, MachineFunction &MF) {
+ unsigned getVGPRSpills(MachineFunction &MF) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
if (!ST.hasGFX90AInsts())
return 0;
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 54a46e4ef222e..7f945f5c094ee 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1223,22 +1223,20 @@ bool GCNSchedStage::initGCNSchedStage() {
return true;
}
-SlotIndex
-RewriteScheduleStage::findReachingDefs(MachineOperand &UseMO,
- LiveIntervals *LIS,
- SmallVectorImpl<SlotIndex> &DefIdxs) {
+void RewriteScheduleStage::findReachingDefs(
+ MachineOperand &UseMO, LiveIntervals *LIS,
+ SmallVectorImpl<SlotIndex> &DefIdxs) {
assert(UseMO.isReg());
MachineInstr *UseMI = UseMO.getParent();
LiveInterval &UseLI = LIS->getInterval(UseMO.getReg());
- auto VNInfo = UseLI.getVNInfoAt(LIS->getInstructionIndex(*UseMI));
+ VNInfo *VNI = UseLI.getVNInfoAt(LIS->getInstructionIndex(*UseMI));
- SlotIndex DefMBBStart =
- LIS->getMBBStartIdx(LIS->getMBBFromIndex(VNInfo->def));
+ SlotIndex DefMBBStart = LIS->getMBBStartIdx(LIS->getMBBFromIndex(VNI->def));
// If the def is in the block, then it must be the only reaching def.
- if (DefMBBStart != VNInfo->def) {
- DefIdxs.push_back(VNInfo->def);
- return VNInfo->def;
+ if (DefMBBStart != VNI->def) {
+ DefIdxs.push_back(VNI->def);
+ return;
}
SmallPtrSet<MachineBasicBlock *, 8> Visited;
@@ -1256,15 +1254,15 @@ RewriteScheduleStage::findReachingDefs(MachineOperand &UseMO,
MachineBasicBlock *CurrMBB = Worklist.pop_back_val();
SlotIndex CurrMBBEnd = LIS->getMBBEndIdx(CurrMBB);
- auto VNInfo = UseLI.getVNInfoAt(CurrMBBEnd.getPrevSlot());
+ VNInfo *VNI = UseLI.getVNInfoAt(CurrMBBEnd.getPrevSlot());
- MachineBasicBlock *DefMBB = LIS->getMBBFromIndex(VNInfo->def);
+ MachineBasicBlock *DefMBB = LIS->getMBBFromIndex(VNI->def);
SlotIndex DefMBBStart = LIS->getMBBStartIdx(DefMBB);
// If there is a def in this block, then add it to the list. This is the
// reaching def of this path.
- if (DefMBBStart != VNInfo->def) {
- DefIdxs.push_back(VNInfo->def);
+ if (DefMBBStart != VNI->def) {
+ DefIdxs.push_back(VNI->def);
continue;
}
@@ -1273,8 +1271,6 @@ RewriteScheduleStage::findReachingDefs(MachineOperand &UseMO,
Worklist.push_back(PredMBB);
}
}
-
- return VNInfo->def;
}
void RewriteScheduleStage::findReachingUses(
@@ -1288,9 +1284,9 @@ void RewriteScheduleStage::findReachingUses(
// If we find a use that contains this DefMI in its reachingDefs, then it is
// a reaching use.
- if (find_if(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {
+ if (any_of(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {
return SlotIndex::isSameInstr(RDIdx, DefIdx);
- }) != ReachingDefIndexes.end())
+ }))
ReachingUses.push_back(&UseMO);
}
}
@@ -1966,27 +1962,29 @@ bool RewriteScheduleStage::initHeuristics(
// Prepare for the heuristics
for (auto &MBB : MF) {
for (auto &MI : MBB) {
- if (isRewriteCandidate(&MI)) {
- int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode());
- if (ReplacementOp == -1)
- continue;
+ if (!isRewriteCandidate(&MI))
+ continue;
- RewriteCands.push_back({&MI, MI.getOpcode()});
- MI.setDesc(TII->get(ReplacementOp));
+ int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode());
+ if (ReplacementOp == -1)
+ continue;
- MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
- if (Src2->isReg()) {
- SmallVector<SlotIndex, 8> Src2ReachingDefs;
- findReachingDefs(*Src2, DAG.LIS, Src2ReachingDefs);
+ RewriteCands.push_back({&MI, MI.getOpcode()});
+ MI.setDesc(TII->get(ReplacementOp));
- // For any definition of the src2 register which is non-MFMA, we
- // insert a copy.
- for (SlotIndex RDIdx : Src2ReachingDefs) {
- MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIdx);
- if (!TII->isMAI(*RD))
- CopyForDef.insert(RD);
- }
+ MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+ if (Src2->isReg()) {
+ SmallVector<SlotIndex, 8> Src2ReachingDefs;
+ findReachingDefs(*Src2, DAG.LIS, Src2ReachingDefs);
+
+ // For any definition of the src2 register which is non-MFMA, we
+ // insert a copy.
+ for (SlotIndex RDIdx : Src2ReachingDefs) {
+ MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIdx);
+ if (!TII->isMAI(*RD))
+ CopyForDef.insert(RD);
}
+ }
MachineOperand &Dst = MI.getOperand(0);
SmallVector<MachineOperand *, 8> DstReachingUses;
@@ -2024,7 +2022,6 @@ bool RewriteScheduleStage::initHeuristics(
DAG.MRI.setRegClass(Dst.getReg(), AGPRRC);
if (Src2->isReg())
DAG.MRI.setRegClass(Src2->getReg(), AGPRRC);
- }
}
}
@@ -2032,28 +2029,32 @@ bool RewriteScheduleStage::initHeuristics(
}
int64_t RewriteScheduleStage::getRewriteCost(
- std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
- DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
- SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
+ const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
+ const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
+ const SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
+ MachineBranchProbabilityInfo MBPI;
+ MachineBlockFrequencyInfo MBFI;
+
MBFI.calculate(MF, MBPI, *DAG.MLI);
int64_t BestSpillCost = 0;
int64_t Cost = 0;
+ uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency();
+
for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) {
if (!RegionsWithExcessArchVGPR[Region])
continue;
- auto PressureBefore = DAG.Pressure[Region];
- unsigned SpillCostBefore = PressureBefore.getVGPRSpills(ST, MF);
+ GCNRegPressure &PressureBefore = DAG.Pressure[Region];
+ unsigned SpillCostBefore = PressureBefore.getVGPRSpills(MF);
// For the cases we care about (i.e. ArchVGPR usage is greater than the
// addressable limit), rewriting alone should bring pressure to manageable
// level. If we find any such region, then the rewrite is potentially
// beneficial.
- auto PressureAfter = DAG.getRealRegPressure(Region);
- unsigned SpillCostAfter = PressureAfter.getVGPRSpills(ST, MF);
+ GCNRegPressure PressureAfter = DAG.getRealRegPressure(Region);
+ unsigned SpillCostAfter = PressureAfter.getVGPRSpills(MF);
- uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency();
uint64_t BlockFreq =
MBFI.getBlockFreq(DAG.Regions[Region].first->getParent())
.getFrequency();
@@ -2090,8 +2091,6 @@ int64_t RewriteScheduleStage::getRewriteCost(
unsigned CopyCost = 0;
- uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency();
-
// For each CopyForDef, increase the cost by the register size while
// accounting for block frequency.
for (auto *DefMI : CopyForDef) {
@@ -2107,12 +2106,11 @@ int64_t RewriteScheduleStage::getRewriteCost(
}
// Account for CopyForUse copies in each block that the register is used.
- for (auto &UseEntry : CopyForUse) {
+ for (auto &[UseBlock, UseRegs] : CopyForUse) {
uint64_t UseFreq =
- EntryFreq ? MBFI.getBlockFreq(UseEntry.first).getFrequency() / EntryFreq
- : 1;
+ EntryFreq ? MBFI.getBlockFreq(UseBlock).getFrequency() / EntryFreq : 1;
- for (auto UseReg : UseEntry.second) {
+ for (auto UseReg : UseRegs) {
unsigned RegSize =
DAG.TRI->getRegSizeInBits(*DAG.MRI.getRegClass(UseReg));
unsigned NumRegs = std::max(RegSize / 32, (unsigned)1);
@@ -2124,9 +2122,7 @@ int64_t RewriteScheduleStage::getRewriteCost(
// Reset to the vgpr form. We must do rewriting after copy-insertion, as some
// defs of the register may require VGPR.
- for (auto RI : RewriteCands) {
- MachineInstr *MI = RI.first;
-
+ for (auto &[MI, OriginalOpcode] : RewriteCands) {
assert(TII->isMAI(*MI));
const TargetRegisterClass *AGPRRC =
DAG.MRI.getRegClass(MI->getOperand(0).getReg());
@@ -2135,18 +2131,17 @@ int64_t RewriteScheduleStage::getRewriteCost(
MachineOperand *Src2 = TII->getNamedOperand(*MI, AMDGPU::OpName::src2);
assert(Src2);
- if (Src2->isReg()) {
+ if (Src2->isReg())
DAG.MRI.setRegClass(Src2->getReg(), VGPRRC);
- }
DAG.MRI.setRegClass(MI->getOperand(0).getReg(), VGPRRC);
- MI->setDesc(TII->get(RI.second));
+ MI->setDesc(TII->get(OriginalOpcode));
}
return Cost;
}
bool RewriteScheduleStage::rewrite(
- std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands) {
+ const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands) {
DenseMap<MachineInstr *, unsigned> FirstMIToRegion;
DenseMap<MachineInstr *, unsigned> LastMIToRegion;
@@ -2180,7 +2175,7 @@ bool RewriteScheduleStage::rewrite(
// want to replace the register it is using with the result of the copy, we
// must handle case 3. In the third case, we simply insert a copy after each
// of the reaching defs to connect to the copy of the reaching uses of the dst
- // reg. This allows us to avoid inserting copies next to the' MFMAs.
+ // reg. This allows us to avoid inserting copies next to the MFMAs.
//
// While inserting the copies, we maintain a map of operands which will use
// different regs (i.e. the result of the copies). For example, a case 1 src2
@@ -2191,14 +2186,14 @@ bool RewriteScheduleStage::rewrite(
// queries.
//
// While inserting the copies, we also maintain a list or registers which we
- // will want to reclassify as AGPR. After doing the copy isnertion and the
+ // will want to reclassify as AGPR. After doing the copy insertion and the
// register replacement, we can finally do the reclassification. This uses the
// redef map, as the registers we are interested in reclassifying may be
// replaced by the result of a copy. We must do this after the copy analysis
// and placement as we must have an accurate redef map -- otherwise we may end
// up creating illegal instructions.
- // The original registers of the MFMA that need to be reclassified as AGPR
+ // The original registers of the MFMA that need to be reclassified as AGPR.
std::set<Register> RewriteRegs;
// The map of an original register in the MFMA to a new register (result of a
// copy) that it should be replaced with.
@@ -2212,16 +2207,15 @@ bool RewriteScheduleStage::rewrite(
DenseMap<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>
ReachingUseTracker;
- for (auto &RI : RewriteCands) {
- MachineInstr &MI = *RI.first;
+ for (auto &[MI, OriginalOpcode] : RewriteCands) {
- int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode());
+ int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode());
if (ReplacementOp == -1)
continue;
- MI.setDesc(TII->get(ReplacementOp));
+ MI->setDesc(TII->get(ReplacementOp));
// Case 1: insert copies for the reaching defs of the Src2Reg.
- MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+ MachineOperand *Src2 = TII->getNamedOperand(*MI, AMDGPU::OpName::src2);
if (Src2->isReg()) {
Register Src2Reg = Src2->getReg();
@@ -2291,7 +2285,7 @@ bool RewriteScheduleStage::rewrite(
// Case 2 and Case 3: insert copies before the reaching uses of the dsts,
// and after the reaching defs of the reaching uses of the dsts.
- MachineOperand *Dst = &MI.getOperand(0);
+ MachineOperand *Dst = &MI->getOperand(0);
Register DstReg = Dst->getReg();
if (!DstReg.isVirtual())
return false;
@@ -2302,7 +2296,7 @@ bool RewriteScheduleStage::rewrite(
SmallVector<MachineOperand *, 8> DstReachingUseCopies;
SmallVector<MachineInstr *, 8> DstUseDefsReplace;
- findReachingUses(&MI, DAG.LIS, DstReachingUses);
+ findReachingUses(MI, DAG.LIS, DstReachingUses);
for (MachineOperand *RUOp : DstReachingUses) {
if (TII->isMAI(*RUOp->getParent()))
@@ -2366,7 +2360,7 @@ bool RewriteScheduleStage::rewrite(
MachineBasicBlock *RUBlock = RU->getParent()->getParent();
// Just keep track of the reaching use of this register by block. After we
// have scanned all the MFMAs we can find optimal insert pts.
- if (RUBlock != MI.getParent()) {
+ if (RUBlock != MI->getParent()) {
ReachingUseTracker[RUBlock->getNumber()][DstReg].insert(RU);
continue;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index e2d4f49b4ef16..f7a9f79bf7364 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -425,9 +425,6 @@ class RewriteScheduleStage : public GCNSchedStage {
// spilling.
BitVector RegionsWithExcessArchVGPR;
- MachineBranchProbabilityInfo MBPI;
- MachineBlockFrequencyInfo MBFI;
-
const SIInstrInfo *TII;
const SIRegisterInfo *SRI;
@@ -443,23 +440,24 @@ class RewriteScheduleStage : public GCNSchedStage {
SmallPtrSetImpl<MachineInstr *> &CopyForDef);
/// Calculate the rewrite cost and undo the state change (e.g. rewriting) done
- /// in initHueristics. Uses \p CopyForUse and \p CopyForDef to calculate copy
+ /// in initHeuristics. Uses \p CopyForUse and \p CopyForDef to calculate copy
/// costs, and \p RewriteCands to undo rewriting.
- int64_t
- getRewriteCost(std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
- DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
- SmallPtrSetImpl<MachineInstr *> &CopyForDef);
+ int64_t getRewriteCost(
+ const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
+ const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
+ const SmallPtrSetImpl<MachineInstr *> &CopyForDef);
/// Do the final rewrite on \p RewriteCands and insert any needed copies.
- bool rewrite(std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands);
+ bool
+ rewrite(const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands);
/// \returns true if this MI is a rewrite candidate.
bool isRewriteCandidate(MachineInstr *MI) const;
/// Finds all the reaching defs of \p UseMO and stores the SlotIndexes into \p
- /// DefIdx
- SlotIndex findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS,
- SmallVectorImpl<SlotIndex> &DefIdxs);
+ /// DefIdxs
+ void findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS,
+ SmallVectorImpl<SlotIndex> &DefIdxs);
/// Finds all the reaching uses of \p DefMI and stores the use operands in \p
/// ReachingUses
>From c1971ce327d0c5038cfbb8ad1f0b0023ab7ef26a Mon Sep 17 00:00:00 2001
From: Tony Linthicum <tlinthic at gmail.com>
Date: Thu, 20 Nov 2025 09:30:11 -0600
Subject: [PATCH 4/8] merge of PR 149367
---
llvm/include/llvm/CodeGen/MachineInstrBuilder.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
index dfa35461e52c1..8eef1a4571c30 100644
--- a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -470,8 +470,7 @@ inline MachineInstrBuilder BuildMIAfter(MachineBasicBlock &BB,
MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL());
BB.insertAfter(I, MI);
return MachineInstrBuilder(MF, MI)
- .setPCSections(MIMD.getPCSections())
- .setMMRAMetadata(MIMD.getMMRAMetadata());
+ .copyMIMetadata(MIMD);
}
inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
>From 08e76dcfdc81954eea3fc04d3351a0e08d278424 Mon Sep 17 00:00:00 2001
From: Tony Linthicum <tlinthic at gmail.com>
Date: Tue, 25 Nov 2025 08:20:48 -0600
Subject: [PATCH 5/8] Address PR 149367 review comments
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 18 ++++++++----------
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 2 +-
2 files changed, 9 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 7f945f5c094ee..f1d79820ae232 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1245,7 +1245,7 @@ void RewriteScheduleStage::findReachingDefs(
Visited.insert(UseMI->getParent());
// Mark the predecessor blocks for traversal
- for (auto PredMBB : UseMI->getParent()->predecessors()) {
+ for (auto *PredMBB : UseMI->getParent()->predecessors()) {
Worklist.push_back(PredMBB);
Visited.insert(PredMBB);
}
@@ -1266,7 +1266,7 @@ void RewriteScheduleStage::findReachingDefs(
continue;
}
- for (auto PredMBB : DefMBB->predecessors()) {
+ for (auto *PredMBB : DefMBB->predecessors()) {
if (Visited.insert(PredMBB).second)
Worklist.push_back(PredMBB);
}
@@ -1966,8 +1966,7 @@ bool RewriteScheduleStage::initHeuristics(
continue;
int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode());
- if (ReplacementOp == -1)
- continue;
+ assert(ReplacementOp != -1)
RewriteCands.push_back({&MI, MI.getOpcode()});
MI.setDesc(TII->get(ReplacementOp));
@@ -2238,9 +2237,9 @@ bool RewriteScheduleStage::rewrite(
}
if (!Src2DefsReplace.empty()) {
- if (RedefMap.contains(Src2Reg))
+ if (RedefMap.contains(Src2Reg)) {
MappedReg = RedefMap[Src2Reg];
- else {
+ } else {
assert(!ReachingDefCopyMap.contains(Src2Reg));
const TargetRegisterClass *Src2RC = DAG.MRI.getRegClass(Src2Reg);
const TargetRegisterClass *VGPRRC =
@@ -2399,7 +2398,7 @@ bool RewriteScheduleStage::rewrite(
SlotIndex InstPt = DAG.LIS->getInstructionIndex(*OpBegin->getParent());
// Find the earliest use in this block.
- for (auto User : RUDst.second) {
+ for (auto *User : RUDst.second) {
SlotIndex NewInstPt = DAG.LIS->getInstructionIndex(*User->getParent());
if (SlotIndex::isEarlierInstr(NewInstPt, InstPt))
InstPt = NewInstPt;
@@ -2426,7 +2425,7 @@ bool RewriteScheduleStage::rewrite(
}
// Replace the operand for all users.
- for (auto User : RUDst.second) {
+ for (auto *User : RUDst.second) {
User->setReg(NewUseReg);
}
@@ -2443,9 +2442,8 @@ bool RewriteScheduleStage::rewrite(
Register NewReg = NewDef.second;
// Replace the register for any associated operand in the MFMA chain.
- for (MachineOperand *ReplaceOp : ReplaceMap[OldReg]) {
+ for (MachineOperand *ReplaceOp : ReplaceMap[OldReg])
ReplaceOp->setReg(NewReg);
- }
}
// Finally, do the reclassification of the MFMA registers.
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index f7a9f79bf7364..76dba27d6f83f 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -429,7 +429,7 @@ class RewriteScheduleStage : public GCNSchedStage {
const SIRegisterInfo *SRI;
/// Do a speculative rewrite and collect copy locations. The speculative
- /// rewrite allows us to calulcate the RP of the code after the rewrite, and
+ /// rewrite allows us to calculate the RP of the code after the rewrite, and
/// the copy locations allow us to calculate the total cost of copies required
/// for the rewrite. Stores the rewritten instructions in \p RewriteCands ,
/// the copy locations for uses (of the MFMA result) in \p CopyForUse and the
>From 4b8764d391b1ef798af2d19b1a5014e97e94d50b Mon Sep 17 00:00:00 2001
From: Tony Linthicum <tlinthic at gmail.com>
Date: Mon, 1 Dec 2025 11:09:22 -0600
Subject: [PATCH 6/8] Address PR 149367 review comments
---
.../llvm/CodeGen/MachineInstrBuilder.h | 23 ++++++++-----------
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 20 ++++++++--------
2 files changed, 20 insertions(+), 23 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
index 8eef1a4571c30..8c16b06bce458 100644
--- a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -375,6 +375,15 @@ class MachineInstrBuilder {
return *this;
}
+ /// Inserts the newly-built instruction after the given position in the
+ /// given MachineBasicBlock.
+ const MachineInstrBuilder &insertAfter(MachineInstr *MInstr) const {
+ MachineBasicBlock *MBB = MInstr->getParent();
+ MachineBasicBlock::iterator I = MInstr->getIterator();
+ MBB->insertAfter(I, MI);
+ return *this;
+ }
+
bool constrainAllUses(const TargetInstrInfo &TII,
const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) const {
@@ -459,20 +468,6 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD);
}
-/// This version of the builder inserts the newly-built instruction after the
-/// given position in the given MachineBasicBlock, and does NOT take a
-/// destination register.
-inline MachineInstrBuilder BuildMIAfter(MachineBasicBlock &BB,
- MachineBasicBlock::iterator I,
- const MIMetadata &MIMD,
- const MCInstrDesc &MCID) {
- MachineFunction &MF = *BB.getParent();
- MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL());
- BB.insertAfter(I, MI);
- return MachineInstrBuilder(MF, MI)
- .copyMIMetadata(MIMD);
-}
-
inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
MachineBasicBlock::instr_iterator I,
const MIMetadata &MIMD,
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index f1d79820ae232..0773789c0ace2 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1966,7 +1966,7 @@ bool RewriteScheduleStage::initHeuristics(
continue;
int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode());
- assert(ReplacementOp != -1)
+ assert(ReplacementOp != -1);
RewriteCands.push_back({&MI, MI.getOpcode()});
MI.setDesc(TII->get(ReplacementOp));
@@ -2256,10 +2256,10 @@ bool RewriteScheduleStage::rewrite(
// Do not create redundant copies.
if (ReachingDefCopyMap[Src2Reg].insert(RD).second) {
MachineInstrBuilder VGPRCopy =
- BuildMIAfter(*RD->getParent(), RD->getIterator(),
- RD->getDebugLoc(), TII->get(TargetOpcode::COPY))
+ BuildMI(DAG.MF, RD->getDebugLoc(), TII->get(TargetOpcode::COPY))
.addDef(MappedReg, 0, 0)
- .addUse(Src2Reg, 0, 0);
+ .addUse(Src2Reg, 0, 0)
+ .insertAfter(RD);
DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
// If this reaching def was the last MI in the region, update the
@@ -2338,10 +2338,10 @@ bool RewriteScheduleStage::rewrite(
// Do not create reundant copies.
if (ReachingDefCopyMap[DstReg].insert(RD).second) {
MachineInstrBuilder VGPRCopy =
- BuildMIAfter(*RD->getParent(), RD->getIterator(),
- RD->getDebugLoc(), TII->get(TargetOpcode::COPY))
+ BuildMI(DAG.MF, RD->getDebugLoc(), TII->get(TargetOpcode::COPY))
.addDef(MappedReg, 0, 0)
- .addUse(DstReg, 0, 0);
+ .addUse(DstReg, 0, 0)
+ .insertAfter(RD);
DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
// If this reaching def was the last MI in the region, update the
@@ -2418,10 +2418,10 @@ bool RewriteScheduleStage::rewrite(
// If this UseInst was the first MI in the region, update the region
// boundaries.
- if (LastMIToRegion.contains(UseInst)) {
+ if (FirstMIToRegion.contains(UseInst)) {
unsigned UpdateRegion = FirstMIToRegion[UseInst];
DAG.Regions[UpdateRegion].first = VGPRCopy;
- LastMIToRegion.erase(UseInst);
+ FirstMIToRegion.erase(UseInst);
}
// Replace the operand for all users.
@@ -2469,6 +2469,8 @@ bool RewriteScheduleStage::rewrite(
for (unsigned Region = 0; Region < DAG.Regions.size(); Region++)
DAG.LiveIns[Region] = LiveInUpdater.getLiveRegsForRegionIdx(Region);
+ DAG.Pressure[RegionIdx] = DAG.getRealRegPressure(RegionIdx);
+
return true;
}
>From c7357edab0783579ce8146f4e5c004127d157afe Mon Sep 17 00:00:00 2001
From: Tony Linthicum <tlinthic at gmail.com>
Date: Tue, 2 Dec 2025 10:03:06 -0600
Subject: [PATCH 7/8] [AMDGPU] Add scheduling stage to rewrite MFMA from VGPR
to AGPR
---
.../AMDGPU/sched_mfma_rewrite_copies.mir | 2082 ++++++++---------
.../AMDGPU/sched_mfma_rewrite_cost.mir | 114 +-
2 files changed, 1098 insertions(+), 1098 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir
index 73eeafb6bccc5..f485b088c8034 100644
--- a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir
@@ -215,42 +215,42 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]], [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]], [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -316,42 +316,42 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]], [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]], [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -418,46 +418,42 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: SCHED_BARRIER 0
@@ -465,7 +461,11 @@ body: |
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -538,55 +538,55 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
- ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]], [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]], [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -662,32 +662,28 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
- ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]]
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]]
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
@@ -695,16 +691,16 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
@@ -719,12 +715,16 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]]
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -805,32 +805,28 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
- ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]]
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]]
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
@@ -838,16 +834,16 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
@@ -861,14 +857,18 @@ body: |
; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
- ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -951,30 +951,26 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
- ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]]
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]]
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
@@ -982,14 +978,14 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
@@ -1002,16 +998,16 @@ body: |
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY4]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.6(0x80000000)
@@ -1026,12 +1022,16 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY13]], [[COPY12]], [[COPY14]], [[COPY11]], [[V_ADD_U32_e32_2]]
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY13]], [[COPY12]], [[COPY14]], [[COPY11]], [[V_ADD_U32_e32_2]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -1124,30 +1124,26 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
- ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]]
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]]
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
@@ -1155,14 +1151,14 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
@@ -1175,16 +1171,16 @@ body: |
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY4]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.6(0x80000000)
@@ -1198,14 +1194,18 @@ body: |
; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
- ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec
; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY13]], [[COPY12]], [[COPY14]], [[COPY11]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY13]], [[COPY12]], [[COPY14]], [[COPY11]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -1299,33 +1299,29 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
@@ -1337,21 +1333,25 @@ body: |
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -1427,33 +1427,29 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
@@ -1465,25 +1461,29 @@ body: |
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF12]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF12]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
- ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -1563,46 +1563,42 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000)
@@ -1614,23 +1610,27 @@ body: |
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF12]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.7
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF12]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -1716,46 +1716,42 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000)
@@ -1767,25 +1763,29 @@ body: |
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF12]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.7
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF12]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
- ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -1871,33 +1871,28 @@ body: |
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
- ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]]
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
@@ -1905,22 +1900,22 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_6:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_7:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_8:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_9:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_10:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_11:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY1]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY2]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_6:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_7:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_8:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_9:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_10:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_11:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
@@ -1949,21 +1944,26 @@ body: |
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
- ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY17]].sub1, [[DEF11]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.6(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY18]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY18]].sub0, [[DEF11]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: [[COPY19:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY20:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
; CHECK-NEXT: [[COPY21:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
; CHECK-NEXT: [[COPY22:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY19]], [[COPY21]], [[COPY20]], [[COPY22]], [[V_ADD_U32_e32_3]]
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[COPY19]], [[COPY21]], [[COPY20]], [[COPY22]], [[V_ADD_U32_e32_3]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -2061,32 +2061,28 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
- ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]]
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]]
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
@@ -2094,16 +2090,16 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
@@ -2126,25 +2122,29 @@ body: |
; CHECK-NEXT: successors: %bb.6(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub1, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub1, [[DEF12]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.6
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.6(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub0, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub0, [[DEF12]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
- ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY13]], [[COPY15]], [[COPY14]], [[COPY16]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY13]], [[COPY15]], [[COPY14]], [[COPY16]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -2238,30 +2238,26 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
- ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]]
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]]
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
@@ -2269,14 +2265,14 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
@@ -2289,16 +2285,16 @@ body: |
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY4]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.6(0x80000000)
@@ -2321,23 +2317,27 @@ body: |
; CHECK-NEXT: successors: %bb.9(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub1, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub1, [[DEF12]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.8:
; CHECK-NEXT: successors: %bb.9(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub0, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub0, [[DEF12]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.9:
; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY15]], [[COPY13]], [[COPY16]], [[COPY14]], [[V_ADD_U32_e32_2]]
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY15]], [[COPY13]], [[COPY16]], [[COPY14]], [[V_ADD_U32_e32_2]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -2441,30 +2441,26 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
- ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]]
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]]
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
@@ -2472,14 +2468,14 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
@@ -2492,10 +2488,10 @@ body: |
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY4]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000)
@@ -2507,25 +2503,29 @@ body: |
; CHECK-NEXT: successors: %bb.8(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY5]].sub1, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY5]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY5]].sub1, [[DEF12]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY5]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: successors: %bb.8(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub0, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub0, [[DEF12]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.8:
- ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -2622,41 +2622,36 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
@@ -2671,9 +2666,14 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
- ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY7]], 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY7]], 0, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]]
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY7]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -2752,41 +2752,37 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
@@ -2801,9 +2797,13 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
- ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[COPY7]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[COPY7]].sub0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[V_ADD_U32_e32_]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY7]], [[V_ADD_U32_e32_]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -2882,51 +2882,51 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 128, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]]
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -3000,55 +3000,55 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 128, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY1]], 0, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]]
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -3122,52 +3122,52 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 128, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]]
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
- ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]]
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY5]], 0, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY6]], [[COPY8]], [[COPY7]], [[COPY5]]
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY6]], [[COPY8]], [[COPY7]], [[COPY5]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -3240,53 +3240,53 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 128, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]]
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
- ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]]
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 128, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY5]], 128, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY6]], [[COPY8]], [[COPY7]], [[COPY5]]
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY6]], [[COPY8]], [[COPY7]], [[COPY5]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -3360,45 +3360,40 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]]
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
- ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]]
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
@@ -3411,19 +3406,24 @@ body: |
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY3]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
- ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 128, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY5]], 128, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY6]], [[COPY8]], [[COPY5]]
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY7]], [[COPY6]], [[COPY8]], [[COPY5]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -3506,45 +3506,40 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]]
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
- ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]]
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
@@ -3557,20 +3552,25 @@ body: |
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY3]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
- ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_1]], 128, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]], 128, 0, implicit $exec
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
- ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 384, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY5]], 384, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY6]], [[COPY8]], [[COPY5]]
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY7]], [[COPY6]], [[COPY8]], [[COPY5]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -3656,33 +3656,28 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
@@ -3694,21 +3689,26 @@ body: |
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY1]], 0, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY2]], 128, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY2]], 128, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]]
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -3783,33 +3783,28 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
@@ -3821,25 +3816,30 @@ body: |
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub0, 0, 0, implicit $exec
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub1, 256, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY1]].sub0, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY1]].sub1, 256, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub1, 0, 0, implicit $exec
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub0, 256, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY2]].sub1, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY2]].sub0, 256, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
- ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]]
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -3920,46 +3920,41 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 256, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000)
@@ -3971,23 +3966,28 @@ body: |
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub0, 0, 0, implicit $exec
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub1, 128, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub0, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub1, 128, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.7
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub1, 0, 0, implicit $exec
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub0, 128, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub1, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub0, 128, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]]
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -4074,46 +4074,41 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 256, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000)
@@ -4125,25 +4120,30 @@ body: |
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub0, 0, 0, implicit $exec
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub1, 128, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub0, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub1, 128, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.7
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub1, 0, 0, implicit $exec
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub0, 128, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub1, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub0, 128, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[DS_READ_B128_gfx9_]].sub0, 256, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_]].sub0, 256, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]]
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -4230,43 +4230,38 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]]
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
- ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]]
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
@@ -4278,21 +4273,26 @@ body: |
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY5]], 0, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY6]], 256, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY6]], 256, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY9]], [[COPY8]], [[COPY10]]
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY7]], [[COPY9]], [[COPY8]], [[COPY10]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -4375,43 +4375,38 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]]
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
- ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]]
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
@@ -4423,25 +4418,30 @@ body: |
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub0, 0, 0, implicit $exec
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub1, 256, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY5]].sub0, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY5]].sub1, 256, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub1, 0, 0, implicit $exec
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub0, 256, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY6]].sub1, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY6]].sub0, 256, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
- ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY9]], [[COPY8]], [[COPY10]]
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY7]], [[COPY9]], [[COPY8]], [[COPY10]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -4528,45 +4528,40 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]]
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
- ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]]
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
@@ -4579,10 +4574,10 @@ body: |
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY3]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000)
@@ -4594,23 +4589,28 @@ body: |
; CHECK-NEXT: successors: %bb.8(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub0, 0, 0, implicit $exec
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub1, 128, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY5]].sub0, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY5]].sub1, 128, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: successors: %bb.8(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub1, 0, 0, implicit $exec
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub0, 128, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY6]].sub1, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY6]].sub0, 128, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.8:
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]]
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -4705,45 +4705,40 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]]
; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
- ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]]
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
@@ -4756,10 +4751,10 @@ body: |
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY3]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000)
@@ -4771,25 +4766,30 @@ body: |
; CHECK-NEXT: successors: %bb.8(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub0, 0, 0, implicit $exec
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub1, 128, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY5]].sub0, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY5]].sub1, 128, 0, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: successors: %bb.8(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub1, 0, 0, implicit $exec
- ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub0, 128, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY6]].sub1, 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY6]].sub0, 128, 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.8:
- ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_1]], 256, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]], 256, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]]
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -4886,42 +4886,42 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY1]], 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY1]], 0, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -4989,30 +4989,26 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
- ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]]
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]]
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
@@ -5020,14 +5016,14 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
@@ -5040,10 +5036,10 @@ body: |
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY4]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000)
@@ -5055,16 +5051,16 @@ body: |
; CHECK-NEXT: successors: %bb.8(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY5]], 0, 0, implicit $exec
- ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: S_BRANCH %bb.8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: successors: %bb.8(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
- ; CHECK-NEXT: undef [[DEF21:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub0, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[DEF21:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[DEF17:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub0, [[DEF12]], implicit $exec
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.8:
; CHECK-NEXT: SCHED_BARRIER 0
@@ -5072,7 +5068,11 @@ body: |
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]], [[DEF21]]
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF19]], [[DEF20]], [[DEF21]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF18]], [[DEF13]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]], [[DEF17]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -5168,32 +5168,28 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
- ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]]
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]]
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]]
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
@@ -5201,16 +5197,16 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
@@ -5224,14 +5220,18 @@ body: |
; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
- ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec
; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]]
- ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY11]], 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY11]], 0, 0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]]
; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]]
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -5314,46 +5314,42 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[DS_READ_B128_gfx9_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[DS_READ_B128_gfx9_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000)
@@ -5365,24 +5361,28 @@ body: |
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF12]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.7
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.7(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY2]], 0, 0, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY2]], 0, 0, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
- ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DS_READ_B128_gfx9_]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DS_READ_B128_gfx9_]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -5468,68 +5468,68 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]]
- ; CHECK-NEXT: dead [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]]
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: $scc = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]]
- ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = COPY [[DEF21]]
- ; CHECK-NEXT: [[DEF22:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = COPY [[DEF22]]
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]]
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = COPY [[DEF17]]
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF12]], implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]]
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY4]], [[COPY3]], [[COPY5]], [[COPY2]], [[V_ADD_U32_e32_1]]
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF22:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF20]], [[DEF21]], [[DEF22]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF19]], [[DEF13]], [[COPY4]], [[COPY3]], [[COPY5]], [[COPY2]], [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir
index 050e4bc5e941c..2982c99c3fa7b 100644
--- a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir
@@ -45,40 +45,33 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub0, [[DEF9]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
@@ -90,7 +83,10 @@ body: |
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF16]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000)
@@ -101,8 +97,8 @@ body: |
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.8(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub1, [[DEF16]], implicit $exec
- ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub1, [[DEF9]], implicit $exec
+ ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub0, [[DEF9]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
@@ -114,19 +110,23 @@ body: |
; CHECK-NEXT: bb.8:
; CHECK-NEXT: successors: %bb.9(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub0, [[DEF16]], implicit $exec
- ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_3:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub0, [[DEF9]], implicit $exec
+ ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_3:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub1, [[DEF9]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.9:
; CHECK-NEXT: successors: %bb.10(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_4:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub1, [[DEF18]].sub0, implicit $exec
- ; CHECK-NEXT: [[V_ADD_U32_e32_4:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub2, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_4:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub1, [[DEF11]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_4:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub2, [[DEF9]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.10:
- ; CHECK-NEXT: [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF18]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF11]].sub0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_4]], [[V_ADD_U32_e32_5]]
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF10]], [[DEF11]], [[V_ADD_U32_e32_4]], [[V_ADD_U32_e32_5]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
@@ -229,42 +229,35 @@ body: |
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def %12
; CHECK-NEXT: S_NOP 0, implicit-def %13
- ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
; CHECK-NEXT: SCHED_BARRIER 0
- ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]]
- ; CHECK-NEXT: dead [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: dead [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
- ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF11]]
+ ; CHECK-NEXT: dead [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec
; CHECK-NEXT: $scc = IMPLICIT_DEF
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub0, [[DEF9]], implicit $exec
; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec
; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]]
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
@@ -277,7 +270,10 @@ body: |
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF15:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF14]], [[DEF15]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000)
@@ -289,22 +285,26 @@ body: |
; CHECK-NEXT: successors: %bb.8(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF9]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF9]], implicit $exec
; CHECK-NEXT: S_BRANCH %bb.8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: successors: %bb.8(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
- ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY3]].sub0, [[DEF16]], implicit $exec
- ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY3]].sub1, [[DEF16]], implicit $exec
+ ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY3]].sub0, [[DEF9]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY3]].sub1, [[DEF9]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.8:
- ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF10]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec
; CHECK-NEXT: SCHED_BARRIER 0
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]]
- ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY4]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
+ ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF10]], [[COPY4]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]]
; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13
; CHECK-NEXT: S_ENDPGM 0
bb.0:
>From e7f7510cf0ea81bb6bd92d22fb58ed4c26135707 Mon Sep 17 00:00:00 2001
From: Tony Linthicum <tlinthic at gmail.com>
Date: Tue, 2 Dec 2025 12:59:08 -0600
Subject: [PATCH 8/8] Remove unnecessary second hasFFX90AInsts conditional
check
---
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 14 +++++---------
1 file changed, 5 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index d13d1ddd9c0eb..f04788c5d5715 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -120,15 +120,11 @@ struct GCNRegPressure {
unsigned AGPRSpill =
AGPRPressure > AGPRThreshold ? (AGPRPressure - AGPRThreshold) : 0;
- unsigned UnifiedSpill = 0;
-
- if (ST.hasGFX90AInsts()) {
- unsigned CombinedThreshold = ST.getMaxNumVGPRs(MF);
- unsigned UnifiedPressure = getVGPRNum(true);
- UnifiedSpill = UnifiedPressure > CombinedThreshold
- ? (UnifiedPressure - CombinedThreshold)
- : 0;
- }
+ unsigned CombinedThreshold = ST.getMaxNumVGPRs(MF);
+ unsigned UnifiedPressure = getVGPRNum(true);
+ unsigned UnifiedSpill = UnifiedPressure > CombinedThreshold
+ ? (UnifiedPressure - CombinedThreshold)
+ : 0;
return std::max(UnifiedSpill, (ArchSpill + AGPRSpill));
}
More information about the llvm-commits
mailing list