[llvm] [NFC][llvm] Fix incomplete type issues in LLVM (PR #182655)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 20 23:45:21 PST 2026
https://github.com/2876225417 updated https://github.com/llvm/llvm-project/pull/182655
>From f55c2c0e60bbcbd3e79a929911a049846f6e1417 Mon Sep 17 00:00:00 2001
From: ppqwqqq <2876225417 at qq.com>
Date: Sat, 21 Feb 2026 13:04:40 +0800
Subject: [PATCH] [llvm] Fix incomplete type issues in LLVM
---
llvm/include/llvm/ADT/STLExtras.h | 3 +
llvm/include/llvm/IR/CFG.h | 1 +
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 542 ++++++++++----------
llvm/lib/Target/BPF/BPFAsmPrinter.cpp | 4 +
llvm/lib/Target/BPF/BPFAsmPrinter.h | 3 +-
5 files changed, 291 insertions(+), 262 deletions(-)
diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index 80c97e77724e9..c8276b17014e1 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -1221,6 +1221,9 @@ class indexed_accessor_range_base {
class iterator : public indexed_accessor_iterator<iterator, BaseT, T,
PointerT, ReferenceT> {
public:
+ iterator()
+ : indexed_accessor_iterator<iterator, BaseT, T, std::remove_cv_t<T>, T>(
+ nullptr, 0) {}
// Index into this iterator, invoking a static method on the derived type.
ReferenceT operator*() const {
return DerivedT::dereference_iterator(this->getBase(), this->getIndex());
diff --git a/llvm/include/llvm/IR/CFG.h b/llvm/include/llvm/IR/CFG.h
index 96d3b2fbb5b0b..da1c1f645d25d 100644
--- a/llvm/include/llvm/IR/CFG.h
+++ b/llvm/include/llvm/IR/CFG.h
@@ -180,6 +180,7 @@ class SuccIterator
};
public:
+ SuccIterator() : Inst(nullptr), Idx(0) {}
// begin iterator
explicit inline SuccIterator(InstructionT *Inst) : Inst(Inst), Idx(0) {}
// end iterator
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 12361da511f5a..32e32300acb6d 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -487,192 +487,7 @@ class WaitcntGeneratorGFX12Plus final : public WaitcntGenerator {
AMDGPU::Waitcnt getAllZeroWaitcnt(bool IncludeVSCnt) const override;
};
-// Flags indicating which counters should be flushed in a loop preheader.
-struct PreheaderFlushFlags {
- bool FlushVmCnt = false;
- bool FlushDsCnt = false;
-};
-
-class SIInsertWaitcnts {
-public:
- const GCNSubtarget *ST;
- const SIInstrInfo *TII = nullptr;
- const SIRegisterInfo *TRI = nullptr;
- const MachineRegisterInfo *MRI = nullptr;
- InstCounterType SmemAccessCounter;
- InstCounterType MaxCounter;
- bool IsExpertMode = false;
-
-private:
- DenseMap<const Value *, MachineBasicBlock *> SLoadAddresses;
- DenseMap<MachineBasicBlock *, PreheaderFlushFlags> PreheadersToFlush;
- MachineLoopInfo *MLI;
- MachinePostDominatorTree *PDT;
- AliasAnalysis *AA = nullptr;
-
- struct BlockInfo {
- std::unique_ptr<WaitcntBrackets> Incoming;
- bool Dirty = true;
- };
-
- MapVector<MachineBasicBlock *, BlockInfo> BlockInfos;
-
- bool ForceEmitWaitcnt[NUM_INST_CNTS];
-
- std::unique_ptr<WaitcntGenerator> WCG;
-
- // Remember call and return instructions in the function.
- DenseSet<MachineInstr *> CallInsts;
- DenseSet<MachineInstr *> ReturnInsts;
-
- // Remember all S_ENDPGM instructions. The boolean flag is true if there might
- // be outstanding stores but definitely no outstanding scratch stores, to help
- // with insertion of DEALLOC_VGPRS messages.
- DenseMap<MachineInstr *, bool> EndPgmInsts;
-
- AMDGPU::HardwareLimits Limits;
-
-public:
- SIInsertWaitcnts(MachineLoopInfo *MLI, MachinePostDominatorTree *PDT,
- AliasAnalysis *AA)
- : MLI(MLI), PDT(PDT), AA(AA) {
- (void)ForceExpCounter;
- (void)ForceLgkmCounter;
- (void)ForceVMCounter;
- }
-
- const AMDGPU::HardwareLimits &getLimits() const { return Limits; }
-
- PreheaderFlushFlags getPreheaderFlushFlags(MachineLoop *ML,
- const WaitcntBrackets &Brackets);
- PreheaderFlushFlags isPreheaderToFlush(MachineBasicBlock &MBB,
- const WaitcntBrackets &ScoreBrackets);
- bool isVMEMOrFlatVMEM(const MachineInstr &MI) const;
- bool isDSRead(const MachineInstr &MI) const;
- bool mayStoreIncrementingDSCNT(const MachineInstr &MI) const;
- bool run(MachineFunction &MF);
-
- void setForceEmitWaitcnt() {
-// For non-debug builds, ForceEmitWaitcnt has been initialized to false;
-// For debug builds, get the debug counter info and adjust if need be
-#ifndef NDEBUG
- if (DebugCounter::isCounterSet(ForceExpCounter) &&
- DebugCounter::shouldExecute(ForceExpCounter)) {
- ForceEmitWaitcnt[EXP_CNT] = true;
- } else {
- ForceEmitWaitcnt[EXP_CNT] = false;
- }
-
- if (DebugCounter::isCounterSet(ForceLgkmCounter) &&
- DebugCounter::shouldExecute(ForceLgkmCounter)) {
- ForceEmitWaitcnt[DS_CNT] = true;
- ForceEmitWaitcnt[KM_CNT] = true;
- } else {
- ForceEmitWaitcnt[DS_CNT] = false;
- ForceEmitWaitcnt[KM_CNT] = false;
- }
-
- if (DebugCounter::isCounterSet(ForceVMCounter) &&
- DebugCounter::shouldExecute(ForceVMCounter)) {
- ForceEmitWaitcnt[LOAD_CNT] = true;
- ForceEmitWaitcnt[SAMPLE_CNT] = true;
- ForceEmitWaitcnt[BVH_CNT] = true;
- } else {
- ForceEmitWaitcnt[LOAD_CNT] = false;
- ForceEmitWaitcnt[SAMPLE_CNT] = false;
- ForceEmitWaitcnt[BVH_CNT] = false;
- }
-
- ForceEmitWaitcnt[VA_VDST] = false;
- ForceEmitWaitcnt[VM_VSRC] = false;
-#endif // NDEBUG
- }
-
- // Return the appropriate VMEM_*_ACCESS type for Inst, which must be a VMEM
- // instruction.
- WaitEventType getVmemWaitEventType(const MachineInstr &Inst) const {
- switch (Inst.getOpcode()) {
- // FIXME: GLOBAL_INV needs to be tracked with xcnt too.
- case AMDGPU::GLOBAL_INV:
- return GLOBAL_INV_ACCESS; // tracked using loadcnt, but doesn't write
- // VGPRs
- case AMDGPU::GLOBAL_WB:
- case AMDGPU::GLOBAL_WBINV:
- return VMEM_WRITE_ACCESS; // tracked using storecnt
- default:
- break;
- }
-
- // Maps VMEM access types to their corresponding WaitEventType.
- static const WaitEventType VmemReadMapping[NUM_VMEM_TYPES] = {
- VMEM_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS};
-
- assert(SIInstrInfo::isVMEM(Inst));
- // LDS DMA loads are also stores, but on the LDS side. On the VMEM side
- // these should use VM_CNT.
- if (!ST->hasVscnt() || SIInstrInfo::mayWriteLDSThroughDMA(Inst))
- return VMEM_ACCESS;
- if (Inst.mayStore() &&
- (!Inst.mayLoad() || SIInstrInfo::isAtomicNoRet(Inst))) {
- if (TII->mayAccessScratch(Inst))
- return SCRATCH_WRITE_ACCESS;
- return VMEM_WRITE_ACCESS;
- }
- if (!ST->hasExtendedWaitCounts() || SIInstrInfo::isFLAT(Inst))
- return VMEM_ACCESS;
- return VmemReadMapping[getVmemType(Inst)];
- }
-
- std::optional<WaitEventType>
- getExpertSchedulingEventType(const MachineInstr &Inst) const;
-
- bool isAsync(const MachineInstr &MI) const {
- if (!SIInstrInfo::isLDSDMA(MI))
- return false;
- if (SIInstrInfo::usesASYNC_CNT(MI))
- return true;
- const MachineOperand *Async =
- TII->getNamedOperand(MI, AMDGPU::OpName::IsAsync);
- return Async && (Async->getImm());
- }
-
- bool isNonAsyncLdsDmaWrite(const MachineInstr &MI) const {
- return SIInstrInfo::mayWriteLDSThroughDMA(MI) && !isAsync(MI);
- }
-
- bool isAsyncLdsDmaWrite(const MachineInstr &MI) const {
- return SIInstrInfo::mayWriteLDSThroughDMA(MI) && isAsync(MI);
- }
-
- bool isVmemAccess(const MachineInstr &MI) const;
- bool generateWaitcntInstBefore(MachineInstr &MI,
- WaitcntBrackets &ScoreBrackets,
- MachineInstr *OldWaitcntInstr,
- PreheaderFlushFlags FlushFlags);
- bool generateWaitcnt(AMDGPU::Waitcnt Wait,
- MachineBasicBlock::instr_iterator It,
- MachineBasicBlock &Block, WaitcntBrackets &ScoreBrackets,
- MachineInstr *OldWaitcntInstr);
- void updateEventWaitcntAfter(MachineInstr &Inst,
- WaitcntBrackets *ScoreBrackets);
- bool isNextENDPGM(MachineBasicBlock::instr_iterator It,
- MachineBasicBlock *Block) const;
- bool insertForcedWaitAfter(MachineInstr &Inst, MachineBasicBlock &Block,
- WaitcntBrackets &ScoreBrackets);
- bool insertWaitcntInBlock(MachineFunction &MF, MachineBasicBlock &Block,
- WaitcntBrackets &ScoreBrackets);
- /// Removes redundant Soft Xcnt Waitcnts in \p Block emitted by the Memory
- /// Legalizer. Returns true if block was modified.
- bool removeRedundantSoftXcnts(MachineBasicBlock &Block);
- void setSchedulingMode(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- bool ExpertMode) const;
- const WaitEventSet &getWaitEvents(InstCounterType T) const {
- return WCG->getWaitEvents(T);
- }
- InstCounterType getCounterFromEvent(WaitEventType E) const {
- return WCG->getCounterFromEvent(E);
- }
-};
+class SIInsertWaitcnts;
// This objects maintains the current score brackets of each wait counter, and
// a per-register scoreboard for each wait counter.
@@ -684,10 +499,7 @@ class SIInsertWaitcnts {
// "s_waitcnt 0" before use.
class WaitcntBrackets {
public:
- WaitcntBrackets(const SIInsertWaitcnts *Context) : Context(Context) {
- assert(Context->TRI->getNumRegUnits() < REGUNITS_END);
- }
-
+ WaitcntBrackets(const SIInsertWaitcnts *Context);
#ifndef NDEBUG
~WaitcntBrackets() {
unsigned NumUnusedVmem = 0, NumUnusedSGPRs = 0;
@@ -709,9 +521,7 @@ class WaitcntBrackets {
}
#endif
- bool isSmemCounter(InstCounterType T) const {
- return T == Context->SmemAccessCounter || T == X_CNT;
- }
+ bool isSmemCounter(InstCounterType T) const;
unsigned getSgprScoresIdx(InstCounterType T) const {
assert(isSmemCounter(T) && "Invalid SMEM counter");
@@ -785,18 +595,8 @@ class WaitcntBrackets {
bool hasPendingEvent(WaitEventType E) const {
return PendingEvents.contains(E);
}
- bool hasPendingEvent(InstCounterType T) const {
- bool HasPending = PendingEvents & Context->getWaitEvents(T);
- assert(HasPending == !empty(T) &&
- "Expected pending events iff scoreboard is not empty");
- return HasPending;
- }
-
- bool hasMixedPendingEvents(InstCounterType T) const {
- WaitEventSet Events = PendingEvents & Context->getWaitEvents(T);
- // Return true if more than one bit is set in Events.
- return Events.twoOrMore();
- }
+ bool hasPendingEvent(InstCounterType T) const;
+ bool hasMixedPendingEvents(InstCounterType T) const;
bool hasPendingFlat() const {
return ((LastFlat[DS_CNT] > ScoreLBs[DS_CNT] &&
@@ -814,23 +614,13 @@ class WaitcntBrackets {
return LastGDS > ScoreLBs[DS_CNT] && LastGDS <= ScoreUBs[DS_CNT];
}
- unsigned getPendingGDSWait() const {
- return std::min(getScoreUB(DS_CNT) - LastGDS,
- getWaitCountMax(Context->getLimits(), DS_CNT) - 1);
- }
+ unsigned getPendingGDSWait() const;
void setPendingGDS() { LastGDS = ScoreUBs[DS_CNT]; }
// Return true if there might be pending writes to the vgpr-interval by VMEM
// instructions with types different from V.
- bool hasOtherPendingVmemTypes(MCPhysReg Reg, VmemType V) const {
- for (MCRegUnit RU : regunits(Reg)) {
- auto It = VMem.find(toVMEMID(RU));
- if (It != VMem.end() && (It->second.VMEMTypes & ~(1 << V)))
- return true;
- }
- return false;
- }
+ bool hasOtherPendingVmemTypes(MCPhysReg Reg, VmemType V) const;
void clearVgprVmemTypes(MCPhysReg Reg) {
for (MCRegUnit RU : regunits(Reg)) {
@@ -842,11 +632,7 @@ class WaitcntBrackets {
}
}
- void setStateOnFunctionEntryOrReturn() {
- setScoreUB(STORE_CNT, getScoreUB(STORE_CNT) +
- getWaitCountMax(Context->getLimits(), STORE_CNT));
- PendingEvents |= Context->getWaitEvents(STORE_CNT);
- }
+ void setStateOnFunctionEntryOrReturn();
ArrayRef<const MachineInstr *> getLDSDMAStores() const {
return LDSDMAStores;
@@ -881,50 +667,15 @@ class WaitcntBrackets {
bool mergeAsyncMarks(ArrayRef<MergeInfo> MergeInfos,
ArrayRef<CounterValueArray> OtherMarks);
- iterator_range<MCRegUnitIterator> regunits(MCPhysReg Reg) const {
- assert(Reg != AMDGPU::SCC && "Shouldn't be used on SCC");
- if (!Context->TRI->isInAllocatableClass(Reg))
- return {{}, {}};
- const TargetRegisterClass *RC = Context->TRI->getPhysRegBaseClass(Reg);
- unsigned Size = Context->TRI->getRegSizeInBits(*RC);
- if (Size == 16 && Context->ST->hasD16Writes32BitVgpr())
- Reg = Context->TRI->get32BitRegister(Reg);
- return Context->TRI->regunits(Reg);
- }
+ iterator_range<MCRegUnitIterator> regunits(MCPhysReg Reg) const;
void setScoreLB(InstCounterType T, unsigned Val) {
assert(T < NUM_INST_CNTS);
ScoreLBs[T] = Val;
}
- void setScoreUB(InstCounterType T, unsigned Val) {
- assert(T < NUM_INST_CNTS);
- ScoreUBs[T] = Val;
-
- if (T != EXP_CNT)
- return;
-
- if (getScoreRange(EXP_CNT) > getWaitCountMax(Context->getLimits(), EXP_CNT))
- ScoreLBs[EXP_CNT] =
- ScoreUBs[EXP_CNT] - getWaitCountMax(Context->getLimits(), EXP_CNT);
- }
-
- void setRegScore(MCPhysReg Reg, InstCounterType T, unsigned Val) {
- const SIRegisterInfo *TRI = Context->TRI;
- if (Reg == AMDGPU::SCC) {
- SCCScore = Val;
- } else if (TRI->isVectorRegister(*Context->MRI, Reg)) {
- for (MCRegUnit RU : regunits(Reg))
- VMem[toVMEMID(RU)].Scores[T] = Val;
- } else if (TRI->isSGPRReg(*Context->MRI, Reg)) {
- auto STy = getSgprScoresIdx(T);
- for (MCRegUnit RU : regunits(Reg))
- SGPRs[RU].Scores[STy] = Val;
- } else {
- llvm_unreachable("Register cannot be tracked/unknown register!");
- }
- }
-
+ void setScoreUB(InstCounterType T, unsigned Val);
+ void setRegScore(MCPhysReg Reg, InstCounterType T, unsigned Val);
void setVMemScore(VMEMID TID, InstCounterType T, unsigned Val) {
VMem[TID].Scores[T] = Val;
}
@@ -1000,6 +751,193 @@ class WaitcntBrackets {
CounterValueArray AsyncScore{};
};
+// Flags indicating which counters should be flushed in a loop preheader.
+struct PreheaderFlushFlags {
+ bool FlushVmCnt = false;
+ bool FlushDsCnt = false;
+};
+
+class SIInsertWaitcnts {
+public:
+ const GCNSubtarget *ST;
+ const SIInstrInfo *TII = nullptr;
+ const SIRegisterInfo *TRI = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
+ InstCounterType SmemAccessCounter;
+ InstCounterType MaxCounter;
+ bool IsExpertMode = false;
+
+private:
+ DenseMap<const Value *, MachineBasicBlock *> SLoadAddresses;
+ DenseMap<MachineBasicBlock *, PreheaderFlushFlags> PreheadersToFlush;
+ MachineLoopInfo *MLI;
+ MachinePostDominatorTree *PDT;
+ AliasAnalysis *AA = nullptr;
+
+ struct BlockInfo {
+ std::unique_ptr<WaitcntBrackets> Incoming;
+ bool Dirty = true;
+ };
+
+ MapVector<MachineBasicBlock *, BlockInfo> BlockInfos;
+
+ bool ForceEmitWaitcnt[NUM_INST_CNTS];
+
+ std::unique_ptr<WaitcntGenerator> WCG;
+
+ // Remember call and return instructions in the function.
+ DenseSet<MachineInstr *> CallInsts;
+ DenseSet<MachineInstr *> ReturnInsts;
+
+ // Remember all S_ENDPGM instructions. The boolean flag is true if there might
+ // be outstanding stores but definitely no outstanding scratch stores, to help
+ // with insertion of DEALLOC_VGPRS messages.
+ DenseMap<MachineInstr *, bool> EndPgmInsts;
+
+ AMDGPU::HardwareLimits Limits;
+
+public:
+ SIInsertWaitcnts(MachineLoopInfo *MLI, MachinePostDominatorTree *PDT,
+ AliasAnalysis *AA)
+ : MLI(MLI), PDT(PDT), AA(AA) {
+ (void)ForceExpCounter;
+ (void)ForceLgkmCounter;
+ (void)ForceVMCounter;
+ }
+
+ const AMDGPU::HardwareLimits &getLimits() const { return Limits; }
+
+ PreheaderFlushFlags getPreheaderFlushFlags(MachineLoop *ML,
+ const WaitcntBrackets &Brackets);
+ PreheaderFlushFlags isPreheaderToFlush(MachineBasicBlock &MBB,
+ const WaitcntBrackets &ScoreBrackets);
+ bool isVMEMOrFlatVMEM(const MachineInstr &MI) const;
+ bool isDSRead(const MachineInstr &MI) const;
+ bool mayStoreIncrementingDSCNT(const MachineInstr &MI) const;
+ bool run(MachineFunction &MF);
+
+ void setForceEmitWaitcnt() {
+// For non-debug builds, ForceEmitWaitcnt has been initialized to false;
+// For debug builds, get the debug counter info and adjust if need be
+#ifndef NDEBUG
+ if (DebugCounter::isCounterSet(ForceExpCounter) &&
+ DebugCounter::shouldExecute(ForceExpCounter)) {
+ ForceEmitWaitcnt[EXP_CNT] = true;
+ } else {
+ ForceEmitWaitcnt[EXP_CNT] = false;
+ }
+
+ if (DebugCounter::isCounterSet(ForceLgkmCounter) &&
+ DebugCounter::shouldExecute(ForceLgkmCounter)) {
+ ForceEmitWaitcnt[DS_CNT] = true;
+ ForceEmitWaitcnt[KM_CNT] = true;
+ } else {
+ ForceEmitWaitcnt[DS_CNT] = false;
+ ForceEmitWaitcnt[KM_CNT] = false;
+ }
+
+ if (DebugCounter::isCounterSet(ForceVMCounter) &&
+ DebugCounter::shouldExecute(ForceVMCounter)) {
+ ForceEmitWaitcnt[LOAD_CNT] = true;
+ ForceEmitWaitcnt[SAMPLE_CNT] = true;
+ ForceEmitWaitcnt[BVH_CNT] = true;
+ } else {
+ ForceEmitWaitcnt[LOAD_CNT] = false;
+ ForceEmitWaitcnt[SAMPLE_CNT] = false;
+ ForceEmitWaitcnt[BVH_CNT] = false;
+ }
+
+ ForceEmitWaitcnt[VA_VDST] = false;
+ ForceEmitWaitcnt[VM_VSRC] = false;
+#endif // NDEBUG
+ }
+
+ // Return the appropriate VMEM_*_ACCESS type for Inst, which must be a VMEM
+ // instruction.
+ WaitEventType getVmemWaitEventType(const MachineInstr &Inst) const {
+ switch (Inst.getOpcode()) {
+ // FIXME: GLOBAL_INV needs to be tracked with xcnt too.
+ case AMDGPU::GLOBAL_INV:
+ return GLOBAL_INV_ACCESS; // tracked using loadcnt, but doesn't write
+ // VGPRs
+ case AMDGPU::GLOBAL_WB:
+ case AMDGPU::GLOBAL_WBINV:
+ return VMEM_WRITE_ACCESS; // tracked using storecnt
+ default:
+ break;
+ }
+
+ // Maps VMEM access types to their corresponding WaitEventType.
+ static const WaitEventType VmemReadMapping[NUM_VMEM_TYPES] = {
+ VMEM_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS};
+
+ assert(SIInstrInfo::isVMEM(Inst));
+ // LDS DMA loads are also stores, but on the LDS side. On the VMEM side
+ // these should use VM_CNT.
+ if (!ST->hasVscnt() || SIInstrInfo::mayWriteLDSThroughDMA(Inst))
+ return VMEM_ACCESS;
+ if (Inst.mayStore() &&
+ (!Inst.mayLoad() || SIInstrInfo::isAtomicNoRet(Inst))) {
+ if (TII->mayAccessScratch(Inst))
+ return SCRATCH_WRITE_ACCESS;
+ return VMEM_WRITE_ACCESS;
+ }
+ if (!ST->hasExtendedWaitCounts() || SIInstrInfo::isFLAT(Inst))
+ return VMEM_ACCESS;
+ return VmemReadMapping[getVmemType(Inst)];
+ }
+
+ std::optional<WaitEventType>
+ getExpertSchedulingEventType(const MachineInstr &Inst) const;
+
+ bool isAsync(const MachineInstr &MI) const {
+ if (!SIInstrInfo::isLDSDMA(MI))
+ return false;
+ if (SIInstrInfo::usesASYNC_CNT(MI))
+ return true;
+ const MachineOperand *Async =
+ TII->getNamedOperand(MI, AMDGPU::OpName::IsAsync);
+ return Async && (Async->getImm());
+ }
+
+ bool isNonAsyncLdsDmaWrite(const MachineInstr &MI) const {
+ return SIInstrInfo::mayWriteLDSThroughDMA(MI) && !isAsync(MI);
+ }
+
+ bool isAsyncLdsDmaWrite(const MachineInstr &MI) const {
+ return SIInstrInfo::mayWriteLDSThroughDMA(MI) && isAsync(MI);
+ }
+
+ bool isVmemAccess(const MachineInstr &MI) const;
+ bool generateWaitcntInstBefore(MachineInstr &MI,
+ WaitcntBrackets &ScoreBrackets,
+ MachineInstr *OldWaitcntInstr,
+ PreheaderFlushFlags FlushFlags);
+ bool generateWaitcnt(AMDGPU::Waitcnt Wait,
+ MachineBasicBlock::instr_iterator It,
+ MachineBasicBlock &Block, WaitcntBrackets &ScoreBrackets,
+ MachineInstr *OldWaitcntInstr);
+ void updateEventWaitcntAfter(MachineInstr &Inst,
+ WaitcntBrackets *ScoreBrackets);
+ bool isNextENDPGM(MachineBasicBlock::instr_iterator It,
+ MachineBasicBlock *Block) const;
+ bool insertForcedWaitAfter(MachineInstr &Inst, MachineBasicBlock &Block,
+ WaitcntBrackets &ScoreBrackets);
+ bool insertWaitcntInBlock(MachineFunction &MF, MachineBasicBlock &Block,
+ WaitcntBrackets &ScoreBrackets);
+ /// Removes redundant Soft Xcnt Waitcnts in \p Block emitted by the Memory
+ /// Legalizer. Returns true if block was modified.
+ bool removeRedundantSoftXcnts(MachineBasicBlock &Block);
+ void setSchedulingMode(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ bool ExpertMode) const;
+ const WaitEventSet &getWaitEvents(InstCounterType T) const {
+ return WCG->getWaitEvents(T);
+ }
+ InstCounterType getCounterFromEvent(WaitEventType E) const {
+ return WCG->getCounterFromEvent(E);
+ }
+};
+
class SIInsertWaitcntsLegacy : public MachineFunctionPass {
public:
static char ID;
@@ -1021,6 +959,90 @@ class SIInsertWaitcntsLegacy : public MachineFunctionPass {
}
};
+WaitcntBrackets::WaitcntBrackets(const SIInsertWaitcnts *Context)
+ : Context{Context} {
+ assert(Context->TRI->getNumRegUnits() < REGUNITS_END);
+}
+
+bool WaitcntBrackets::isSmemCounter(InstCounterType T) const {
+ return T == Context->SmemAccessCounter || T == X_CNT;
+}
+
+bool WaitcntBrackets::hasPendingEvent(InstCounterType T) const {
+ bool HasPending = PendingEvents & Context->getWaitEvents(T);
+ assert(HasPending == !empty(T) &&
+ "Expected pending events iff scoreboard is not empty");
+ return HasPending;
+}
+
+bool WaitcntBrackets::hasMixedPendingEvents(InstCounterType T) const {
+ WaitEventSet Events = PendingEvents & Context->getWaitEvents(T);
+ // Return true if more than one bit is set in Events.
+ return Events.twoOrMore();
+}
+
+unsigned WaitcntBrackets::getPendingGDSWait() const {
+ return std::min(getScoreUB(DS_CNT) - LastGDS,
+ getWaitCountMax(Context->getLimits(), DS_CNT) - 1);
+}
+
+bool WaitcntBrackets::hasOtherPendingVmemTypes(MCPhysReg Reg,
+ VmemType V) const {
+ for (MCRegUnit RU : regunits(Reg)) {
+ auto It = VMem.find(toVMEMID(RU));
+ if (It != VMem.end() && (It->second.VMEMTypes & ~(1 << V)))
+ return true;
+ }
+ return false;
+}
+
+void WaitcntBrackets::setStateOnFunctionEntryOrReturn() {
+ setScoreUB(STORE_CNT, getScoreUB(STORE_CNT) +
+ getWaitCountMax(Context->getLimits(), STORE_CNT));
+ PendingEvents |= Context->getWaitEvents(STORE_CNT);
+}
+
+iterator_range<MCRegUnitIterator>
+WaitcntBrackets::regunits(MCPhysReg Reg) const {
+ assert(Reg != AMDGPU::SCC && "Shouldn't be used on SCC");
+ if (!Context->TRI->isInAllocatableClass(Reg))
+ return {{}, {}};
+ const TargetRegisterClass *RC = Context->TRI->getPhysRegBaseClass(Reg);
+ unsigned Size = Context->TRI->getRegSizeInBits(*RC);
+ if (Size == 16 && Context->ST->hasD16Writes32BitVgpr())
+ Reg = Context->TRI->get32BitRegister(Reg);
+ return Context->TRI->regunits(Reg);
+}
+
+void WaitcntBrackets::setScoreUB(InstCounterType T, unsigned Val) {
+ assert(T < NUM_INST_CNTS);
+ ScoreUBs[T] = Val;
+
+ if (T != EXP_CNT)
+ return;
+
+ if (getScoreRange(EXP_CNT) > getWaitCountMax(Context->getLimits(), EXP_CNT))
+ ScoreLBs[EXP_CNT] =
+ ScoreUBs[EXP_CNT] - getWaitCountMax(Context->getLimits(), EXP_CNT);
+}
+
+void WaitcntBrackets::setRegScore(MCPhysReg Reg, InstCounterType T,
+ unsigned Val) {
+ const SIRegisterInfo *TRI = Context->TRI;
+ if (Reg == AMDGPU::SCC) {
+ SCCScore = Val;
+ } else if (TRI->isVectorRegister(*Context->MRI, Reg)) {
+ for (MCRegUnit RU : regunits(Reg))
+ VMem[toVMEMID(RU)].Scores[T] = Val;
+ } else if (TRI->isSGPRReg(*Context->MRI, Reg)) {
+ auto STy = getSgprScoresIdx(T);
+ for (MCRegUnit RU : regunits(Reg))
+ SGPRs[RU].Scores[STy] = Val;
+ } else {
+ llvm_unreachable("Register cannot be tracked/unknown register!");
+ }
+}
+
} // end anonymous namespace
void WaitcntBrackets::setScoreByOperand(const MachineOperand &Op,
diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
index abe081c0c76fd..0c618d7582d4c 100644
--- a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -40,6 +40,10 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
+BPFAsmPrinter::BPFAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer), ID), BTF(nullptr), TM(TM) {}
+
bool BPFAsmPrinter::doInitialization(Module &M) {
AsmPrinter::doInitialization(M);
diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.h b/llvm/lib/Target/BPF/BPFAsmPrinter.h
index 75a1d7ed9f884..1031a78a1fe2f 100644
--- a/llvm/lib/Target/BPF/BPFAsmPrinter.h
+++ b/llvm/lib/Target/BPF/BPFAsmPrinter.h
@@ -18,8 +18,7 @@ namespace llvm {
class BPFAsmPrinter : public AsmPrinter {
public:
explicit BPFAsmPrinter(TargetMachine &TM,
- std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer), ID), BTF(nullptr), TM(TM) {}
+ std::unique_ptr<MCStreamer> Streamer);
StringRef getPassName() const override { return "BPF Assembly Printer"; }
bool doInitialization(Module &M) override;
More information about the llvm-commits
mailing list