[llvm] [AMDGPU] Refine operand iterators in the SIInsertWaitcnts. NFCI. (PR #108884)
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 17 02:02:49 PDT 2024
https://github.com/rampitec updated https://github.com/llvm/llvm-project/pull/108884
>From 019b713a7003ce9a2e29564c4cc30da0f3de2eec Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Mon, 16 Sep 2024 11:35:52 -0700
Subject: [PATCH 1/2] [AMDGPU] Refine operand iterators in the
SIInsertWaitcnts. NFCI.
---
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 202 ++++++++------------
1 file changed, 85 insertions(+), 117 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index b855b6b3107029..6906784cdafb24 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -304,7 +304,8 @@ class WaitcntBrackets {
RegInterval getRegInterval(const MachineInstr *MI,
const MachineRegisterInfo *MRI,
- const SIRegisterInfo *TRI, unsigned OpNo) const;
+ const SIRegisterInfo *TRI,
+ const MachineOperand &Op) const;
bool counterOutOfOrder(InstCounterType T) const;
void simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const;
@@ -405,9 +406,9 @@ class WaitcntBrackets {
}
}
- void setExpScore(const MachineInstr *MI, const SIInstrInfo *TII,
- const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI,
- unsigned OpNo, unsigned Val);
+ void setExpScore(const MachineInstr *MI, const SIRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI, const MachineOperand &Op,
+ unsigned Val);
const GCNSubtarget *ST = nullptr;
InstCounterType MaxCounter = NUM_EXTENDED_INST_CNTS;
@@ -734,8 +735,7 @@ class SIInsertWaitcnts : public MachineFunctionPass {
RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
const MachineRegisterInfo *MRI,
const SIRegisterInfo *TRI,
- unsigned OpNo) const {
- const MachineOperand &Op = MI->getOperand(OpNo);
+ const MachineOperand &Op) const {
if (!TRI->isInAllocatableClass(Op.getReg()))
return {-1, -1};
@@ -773,12 +773,11 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
}
void WaitcntBrackets::setExpScore(const MachineInstr *MI,
- const SIInstrInfo *TII,
const SIRegisterInfo *TRI,
- const MachineRegisterInfo *MRI, unsigned OpNo,
- unsigned Val) {
- RegInterval Interval = getRegInterval(MI, MRI, TRI, OpNo);
- assert(TRI->isVectorRegister(*MRI, MI->getOperand(OpNo).getReg()));
+ const MachineRegisterInfo *MRI,
+ const MachineOperand &Op, unsigned Val) {
+ RegInterval Interval = getRegInterval(MI, MRI, TRI, Op);
+ assert(TRI->isVectorRegister(*MRI, Op.getReg()));
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
setRegScore(RegNo, EXP_CNT, Val);
}
@@ -804,79 +803,57 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
// Put score on the source vgprs. If this is a store, just use those
// specific register(s).
if (TII->isDS(Inst) && (Inst.mayStore() || Inst.mayLoad())) {
- int AddrOpIdx =
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::addr);
// All GDS operations must protect their address register (same as
// export.)
- if (AddrOpIdx != -1) {
- setExpScore(&Inst, TII, TRI, MRI, AddrOpIdx, CurrScore);
- }
+ if (const auto *AddrOp = TII->getNamedOperand(Inst, AMDGPU::OpName::addr))
+ setExpScore(&Inst, TRI, MRI, *AddrOp, CurrScore);
if (Inst.mayStore()) {
- if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::data0)) {
- setExpScore(
- &Inst, TII, TRI, MRI,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
- CurrScore);
- }
- if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::data1)) {
- setExpScore(&Inst, TII, TRI, MRI,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
- AMDGPU::OpName::data1),
- CurrScore);
- }
+ if (const auto *Data0 =
+ TII->getNamedOperand(Inst, AMDGPU::OpName::data0))
+ setExpScore(&Inst, TRI, MRI, *Data0, CurrScore);
+ if (const auto *Data1 =
+ TII->getNamedOperand(Inst, AMDGPU::OpName::data1))
+ setExpScore(&Inst, TRI, MRI, *Data1, CurrScore);
} else if (SIInstrInfo::isAtomicRet(Inst) && !SIInstrInfo::isGWS(Inst) &&
Inst.getOpcode() != AMDGPU::DS_APPEND &&
Inst.getOpcode() != AMDGPU::DS_CONSUME &&
Inst.getOpcode() != AMDGPU::DS_ORDERED_COUNT) {
- for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
- const MachineOperand &Op = Inst.getOperand(I);
- if (Op.isReg() && !Op.isDef() &&
- TRI->isVectorRegister(*MRI, Op.getReg())) {
- setExpScore(&Inst, TII, TRI, MRI, I, CurrScore);
- }
+ for (const MachineOperand &Op : Inst.uses()) {
+ if (Op.isReg() && TRI->isVectorRegister(*MRI, Op.getReg()))
+ setExpScore(&Inst, TRI, MRI, Op, CurrScore);
}
}
} else if (TII->isFLAT(Inst)) {
- if (Inst.mayStore()) {
- setExpScore(
- &Inst, TII, TRI, MRI,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
- CurrScore);
- } else if (SIInstrInfo::isAtomicRet(Inst)) {
- setExpScore(
- &Inst, TII, TRI, MRI,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
- CurrScore);
- }
+ if (Inst.mayStore())
+ setExpScore(&Inst, TRI, MRI,
+ *TII->getNamedOperand(Inst, AMDGPU::OpName::data),
+ CurrScore);
+ else if (SIInstrInfo::isAtomicRet(Inst))
+ setExpScore(&Inst, TRI, MRI,
+ *TII->getNamedOperand(Inst, AMDGPU::OpName::data),
+ CurrScore);
} else if (TII->isMIMG(Inst)) {
- if (Inst.mayStore()) {
- setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
- } else if (SIInstrInfo::isAtomicRet(Inst)) {
- setExpScore(
- &Inst, TII, TRI, MRI,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
- CurrScore);
- }
+ if (Inst.mayStore())
+ setExpScore(&Inst, TRI, MRI, Inst.getOperand(0), CurrScore);
+ else if (SIInstrInfo::isAtomicRet(Inst))
+ setExpScore(&Inst, TRI, MRI,
+ *TII->getNamedOperand(Inst, AMDGPU::OpName::data),
+ CurrScore);
} else if (TII->isMTBUF(Inst)) {
- if (Inst.mayStore()) {
- setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
- }
+ if (Inst.mayStore())
+ setExpScore(&Inst, TRI, MRI, Inst.getOperand(0), CurrScore);
} else if (TII->isMUBUF(Inst)) {
- if (Inst.mayStore()) {
- setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
- } else if (SIInstrInfo::isAtomicRet(Inst)) {
- setExpScore(
- &Inst, TII, TRI, MRI,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
- CurrScore);
- }
+ if (Inst.mayStore())
+ setExpScore(&Inst, TRI, MRI, Inst.getOperand(0), CurrScore);
+ else if (SIInstrInfo::isAtomicRet(Inst))
+ setExpScore(&Inst, TRI, MRI,
+ *TII->getNamedOperand(Inst, AMDGPU::OpName::data),
+ CurrScore);
} else if (TII->isLDSDIR(Inst)) {
// LDSDIR instructions attach the score to the destination.
- setExpScore(
- &Inst, TII, TRI, MRI,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdst),
- CurrScore);
+ setExpScore(&Inst, TRI, MRI,
+ *TII->getNamedOperand(Inst, AMDGPU::OpName::vdst), CurrScore);
} else {
if (TII->isEXP(Inst)) {
// For export the destination registers are really temps that
@@ -891,12 +868,9 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
}
}
}
- for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
- MachineOperand &MO = Inst.getOperand(I);
- if (MO.isReg() && !MO.isDef() &&
- TRI->isVectorRegister(*MRI, MO.getReg())) {
- setExpScore(&Inst, TII, TRI, MRI, I, CurrScore);
- }
+ for (const MachineOperand &Op : Inst.uses()) {
+ if (Op.isReg() && TRI->isVectorRegister(*MRI, Op.getReg()))
+ setExpScore(&Inst, TRI, MRI, Op, CurrScore);
}
}
} else /* LGKM_CNT || EXP_CNT || VS_CNT || NUM_INST_CNTS */ {
@@ -907,14 +881,10 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
// artificial dependency, while these are there only for register liveness
// accounting purposes.
//
- // Special cases where implicit register defs and uses exists, such as
- // M0, FLAT_SCR or VCC, but the wait will be generated earlier in the
- // generateWaitcntInstBefore() if that was loaded from memory.
- for (unsigned I = 0, E = Inst.getNumExplicitOperands(); I != E; ++I) {
- auto &Op = Inst.getOperand(I);
- if (!Op.isReg() || !Op.isDef())
- continue;
- RegInterval Interval = getRegInterval(&Inst, MRI, TRI, I);
+ // Special cases where implicit register defs exists, such as M0 or VCC,
+ // but none with memory instructions.
+ for (const MachineOperand &Op : Inst.defs()) {
+ RegInterval Interval = getRegInterval(&Inst, MRI, TRI, Op);
if (T == LOAD_CNT || T == SAMPLE_CNT || T == BVH_CNT) {
if (Interval.first >= NUM_ALL_VGPRS)
continue;
@@ -1692,22 +1662,19 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
// load). We also need to check WAW dependency with saved PC.
Wait = AMDGPU::Waitcnt();
- int CallAddrOpIdx =
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
-
- if (MI.getOperand(CallAddrOpIdx).isReg()) {
+ const auto *CallAddrOp = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ if (CallAddrOp->isReg()) {
RegInterval CallAddrOpInterval =
- ScoreBrackets.getRegInterval(&MI, MRI, TRI, CallAddrOpIdx);
+ ScoreBrackets.getRegInterval(&MI, MRI, TRI, *CallAddrOp);
for (int RegNo = CallAddrOpInterval.first;
RegNo < CallAddrOpInterval.second; ++RegNo)
ScoreBrackets.determineWait(SmemAccessCounter, RegNo, Wait);
- int RtnAddrOpIdx =
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
- if (RtnAddrOpIdx != -1) {
+ if (const auto *RtnAddrOp =
+ TII->getNamedOperand(MI, AMDGPU::OpName::dst)) {
RegInterval RtnAddrOpInterval =
- ScoreBrackets.getRegInterval(&MI, MRI, TRI, RtnAddrOpIdx);
+ ScoreBrackets.getRegInterval(&MI, MRI, TRI, *RtnAddrOp);
for (int RegNo = RtnAddrOpInterval.first;
RegNo < RtnAddrOpInterval.second; ++RegNo)
@@ -1769,8 +1736,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
}
// Loop over use and def operands.
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- MachineOperand &Op = MI.getOperand(I);
+ for (const MachineOperand &Op : MI.operands()) {
if (!Op.isReg())
continue;
@@ -1778,7 +1744,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
if (Op.isTied() && Op.isUse() && TII->doesNotReadTiedSource(MI))
continue;
- RegInterval Interval = ScoreBrackets.getRegInterval(&MI, MRI, TRI, I);
+ RegInterval Interval = ScoreBrackets.getRegInterval(&MI, MRI, TRI, Op);
const bool IsVGPR = TRI->isVectorRegister(*MRI, Op.getReg());
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
@@ -2357,34 +2323,35 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
if (MI.mayStore())
HasVMemStore = true;
}
- for (unsigned I = 0; I < MI.getNumOperands(); I++) {
- MachineOperand &Op = MI.getOperand(I);
+ for (const MachineOperand &Op : MI.uses()) {
if (!Op.isReg() || !TRI->isVectorRegister(*MRI, Op.getReg()))
continue;
- RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, I);
+ RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, Op);
// Vgpr use
- if (Op.isUse()) {
- for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
- // If we find a register that is loaded inside the loop, 1. and 2.
- // are invalidated and we can exit.
- if (VgprDef.contains(RegNo))
- return false;
- VgprUse.insert(RegNo);
- // If at least one of Op's registers is in the score brackets, the
- // value is likely loaded outside of the loop.
- if (Brackets.getRegScore(RegNo, LOAD_CNT) >
- Brackets.getScoreLB(LOAD_CNT) ||
- Brackets.getRegScore(RegNo, SAMPLE_CNT) >
- Brackets.getScoreLB(SAMPLE_CNT) ||
- Brackets.getRegScore(RegNo, BVH_CNT) >
- Brackets.getScoreLB(BVH_CNT)) {
- UsesVgprLoadedOutside = true;
- break;
- }
+ for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+ // If we find a register that is loaded inside the loop, 1. and 2.
+ // are invalidated and we can exit.
+ if (VgprDef.contains(RegNo))
+ return false;
+ VgprUse.insert(RegNo);
+ // If at least one of Op's registers is in the score brackets, the
+ // value is likely loaded outside of the loop.
+ if (Brackets.getRegScore(RegNo, LOAD_CNT) >
+ Brackets.getScoreLB(LOAD_CNT) ||
+ Brackets.getRegScore(RegNo, SAMPLE_CNT) >
+ Brackets.getScoreLB(SAMPLE_CNT) ||
+ Brackets.getRegScore(RegNo, BVH_CNT) >
+ Brackets.getScoreLB(BVH_CNT)) {
+ UsesVgprLoadedOutside = true;
+ break;
}
}
- // VMem load vgpr def
- else if (isVMEMOrFlatVMEM(MI) && MI.mayLoad() && Op.isDef())
+ }
+
+ // VMem load vgpr def
+ if (isVMEMOrFlatVMEM(MI) && MI.mayLoad()) {
+ for (const MachineOperand &Op : MI.defs()) {
+ RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, Op);
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
// If we find a register that is loaded inside the loop, 1. and 2.
// are invalidated and we can exit.
@@ -2392,6 +2359,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
return false;
VgprDef.insert(RegNo);
}
+ }
}
}
}
>From d4de275538acf19897f8f2c4886b10c433af4635 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Tue, 17 Sep 2024 02:02:04 -0700
Subject: [PATCH 2/2] Address review comments
- Braces
- uses/defs -> all_uses/all_defs
---
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 29 ++++++++++++---------
1 file changed, 16 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 6906784cdafb24..fd9fe1196b7853 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -819,37 +819,40 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
Inst.getOpcode() != AMDGPU::DS_APPEND &&
Inst.getOpcode() != AMDGPU::DS_CONSUME &&
Inst.getOpcode() != AMDGPU::DS_ORDERED_COUNT) {
- for (const MachineOperand &Op : Inst.uses()) {
+ for (const MachineOperand &Op : Inst.all_uses()) {
if (Op.isReg() && TRI->isVectorRegister(*MRI, Op.getReg()))
setExpScore(&Inst, TRI, MRI, Op, CurrScore);
}
}
} else if (TII->isFLAT(Inst)) {
- if (Inst.mayStore())
+ if (Inst.mayStore()) {
setExpScore(&Inst, TRI, MRI,
*TII->getNamedOperand(Inst, AMDGPU::OpName::data),
CurrScore);
- else if (SIInstrInfo::isAtomicRet(Inst))
+ } else if (SIInstrInfo::isAtomicRet(Inst)) {
setExpScore(&Inst, TRI, MRI,
*TII->getNamedOperand(Inst, AMDGPU::OpName::data),
CurrScore);
+ }
} else if (TII->isMIMG(Inst)) {
- if (Inst.mayStore())
+ if (Inst.mayStore()) {
setExpScore(&Inst, TRI, MRI, Inst.getOperand(0), CurrScore);
- else if (SIInstrInfo::isAtomicRet(Inst))
+ } else if (SIInstrInfo::isAtomicRet(Inst)) {
setExpScore(&Inst, TRI, MRI,
*TII->getNamedOperand(Inst, AMDGPU::OpName::data),
CurrScore);
+ }
} else if (TII->isMTBUF(Inst)) {
if (Inst.mayStore())
setExpScore(&Inst, TRI, MRI, Inst.getOperand(0), CurrScore);
} else if (TII->isMUBUF(Inst)) {
- if (Inst.mayStore())
+ if (Inst.mayStore()) {
setExpScore(&Inst, TRI, MRI, Inst.getOperand(0), CurrScore);
- else if (SIInstrInfo::isAtomicRet(Inst))
+ } else if (SIInstrInfo::isAtomicRet(Inst)) {
setExpScore(&Inst, TRI, MRI,
*TII->getNamedOperand(Inst, AMDGPU::OpName::data),
CurrScore);
+ }
} else if (TII->isLDSDIR(Inst)) {
// LDSDIR instructions attach the score to the destination.
setExpScore(&Inst, TRI, MRI,
@@ -868,7 +871,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
}
}
}
- for (const MachineOperand &Op : Inst.uses()) {
+ for (const MachineOperand &Op : Inst.all_uses()) {
if (Op.isReg() && TRI->isVectorRegister(*MRI, Op.getReg()))
setExpScore(&Inst, TRI, MRI, Op, CurrScore);
}
@@ -1662,10 +1665,10 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
// load). We also need to check WAW dependency with saved PC.
Wait = AMDGPU::Waitcnt();
- const auto *CallAddrOp = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
- if (CallAddrOp->isReg()) {
+ const auto &CallAddrOp = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ if (CallAddrOp.isReg()) {
RegInterval CallAddrOpInterval =
- ScoreBrackets.getRegInterval(&MI, MRI, TRI, *CallAddrOp);
+ ScoreBrackets.getRegInterval(&MI, MRI, TRI, CallAddrOp);
for (int RegNo = CallAddrOpInterval.first;
RegNo < CallAddrOpInterval.second; ++RegNo)
@@ -2323,7 +2326,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
if (MI.mayStore())
HasVMemStore = true;
}
- for (const MachineOperand &Op : MI.uses()) {
+ for (const MachineOperand &Op : MI.all_uses()) {
if (!Op.isReg() || !TRI->isVectorRegister(*MRI, Op.getReg()))
continue;
RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, Op);
@@ -2350,7 +2353,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
// VMem load vgpr def
if (isVMEMOrFlatVMEM(MI) && MI.mayLoad()) {
- for (const MachineOperand &Op : MI.defs()) {
+ for (const MachineOperand &Op : MI.all_defs()) {
RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, Op);
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
// If we find a register that is loaded inside the loop, 1. and 2.
More information about the llvm-commits
mailing list