[llvm] [AMDGPU] Factor out common code from SIInsertWaitcnts (PR #83018)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 26 07:44:40 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Christudasan Devadasan (cdevadas)
<details>
<summary>Changes</summary>
SIInsertWaitcnts pass inserts various waitcounts required for operands of memory operations. For a subtarget, a new waitcount insertion should be attempted post Hazard Recognizer that comes later in the pipeline than where SIInsertWaitcnts is currently placed.
Factoring out the common code into Utils/AMDGPUWaitCountUtils so that most of the code can be used by the new waitcnt insertion pass as well.
---
Patch is 177.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83018.diff
6 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (+358-2145)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+1-2)
- (added) llvm/lib/Target/AMDGPU/Utils/AMDGPUWaitCountUtils.cpp (+1393)
- (added) llvm/lib/Target/AMDGPU/Utils/AMDGPUWaitCountUtils.h (+531)
- (modified) llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt (+1)
- (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline.ll (+5-5)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index a6184c5e1e0487..19d5ae17d3ec17 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -28,6 +28,7 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "Utils/AMDGPUWaitCountUtils.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Sequence.h"
@@ -38,6 +39,7 @@
#include "llvm/Support/DebugCounter.h"
#include "llvm/TargetParser/TargetParser.h"
using namespace llvm;
+using namespace llvm::AMDGPU;
#define DEBUG_TYPE "si-insert-waitcnts"
@@ -53,1540 +55,229 @@ static cl::opt<bool> ForceEmitZeroFlag(
cl::desc("Force all waitcnt instrs to be emitted as s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)"),
cl::init(false), cl::Hidden);
-namespace {
-// Class of object that encapsulates latest instruction counter score
-// associated with the operand. Used for determining whether
-// s_waitcnt instruction needs to be emitted.
-
-enum InstCounterType {
- LOAD_CNT = 0, // VMcnt prior to gfx12.
- DS_CNT, // LKGMcnt prior to gfx12.
- EXP_CNT, //
- STORE_CNT, // VScnt in gfx10/gfx11.
- NUM_NORMAL_INST_CNTS,
- SAMPLE_CNT = NUM_NORMAL_INST_CNTS, // gfx12+ only.
- BVH_CNT, // gfx12+ only.
- KM_CNT, // gfx12+ only.
- NUM_EXTENDED_INST_CNTS,
- NUM_INST_CNTS = NUM_EXTENDED_INST_CNTS
-};
-} // namespace
-
-namespace llvm {
-template <> struct enum_iteration_traits<InstCounterType> {
- static constexpr bool is_iterable = true;
-};
-} // namespace llvm
-
-namespace {
-// Return an iterator over all counters between LOAD_CNT (the first counter)
-// and \c MaxCounter (exclusive, default value yields an enumeration over
-// all counters).
-auto inst_counter_types(InstCounterType MaxCounter = NUM_INST_CNTS) {
- return enum_seq(LOAD_CNT, MaxCounter);
-}
-
-using RegInterval = std::pair<int, int>;
-
-struct HardwareLimits {
- unsigned LoadcntMax; // Corresponds to VMcnt prior to gfx12.
- unsigned ExpcntMax;
- unsigned DscntMax; // Corresponds to LGKMcnt prior to gfx12.
- unsigned StorecntMax; // Corresponds to VScnt in gfx10/gfx11.
- unsigned SamplecntMax; // gfx12+ only.
- unsigned BvhcntMax; // gfx12+ only.
- unsigned KmcntMax; // gfx12+ only.
-};
-
-struct RegisterEncoding {
- unsigned VGPR0;
- unsigned VGPRL;
- unsigned SGPR0;
- unsigned SGPRL;
-};
-
-enum WaitEventType {
- VMEM_ACCESS, // vector-memory read & write
- VMEM_READ_ACCESS, // vector-memory read
- VMEM_SAMPLER_READ_ACCESS, // vector-memory SAMPLER read (gfx12+ only)
- VMEM_BVH_READ_ACCESS, // vector-memory BVH read (gfx12+ only)
- VMEM_WRITE_ACCESS, // vector-memory write that is not scratch
- SCRATCH_WRITE_ACCESS, // vector-memory write that may be scratch
- LDS_ACCESS, // lds read & write
- GDS_ACCESS, // gds read & write
- SQ_MESSAGE, // send message
- SMEM_ACCESS, // scalar-memory read & write
- EXP_GPR_LOCK, // export holding on its data src
- GDS_GPR_LOCK, // GDS holding on its data and addr src
- EXP_POS_ACCESS, // write to export position
- EXP_PARAM_ACCESS, // write to export parameter
- VMW_GPR_LOCK, // vector-memory write holding on its data src
- EXP_LDS_ACCESS, // read by ldsdir counting as export
- NUM_WAIT_EVENTS,
-};
-
-// The mapping is:
-// 0 .. SQ_MAX_PGM_VGPRS-1 real VGPRs
-// SQ_MAX_PGM_VGPRS .. NUM_ALL_VGPRS-1 extra VGPR-like slots
-// NUM_ALL_VGPRS .. NUM_ALL_VGPRS+SQ_MAX_PGM_SGPRS-1 real SGPRs
-// We reserve a fixed number of VGPR slots in the scoring tables for
-// special tokens like SCMEM_LDS (needed for buffer load to LDS).
-enum RegisterMapping {
- SQ_MAX_PGM_VGPRS = 512, // Maximum programmable VGPRs across all targets.
- AGPR_OFFSET = 256, // Maximum programmable ArchVGPRs across all targets.
- SQ_MAX_PGM_SGPRS = 256, // Maximum programmable SGPRs across all targets.
- NUM_EXTRA_VGPRS = 9, // Reserved slots for DS.
- // Artificial register slots to track LDS writes into specific LDS locations
- // if a location is known. When slots are exhausted or location is
- // unknown use the first slot. The first slot is also always updated in
- // addition to known location's slot to properly generate waits if dependent
- // instruction's location is unknown.
- EXTRA_VGPR_LDS = 0,
- NUM_ALL_VGPRS = SQ_MAX_PGM_VGPRS + NUM_EXTRA_VGPRS, // Where SGPR starts.
-};
-
-// Enumerate different types of result-returning VMEM operations. Although
-// s_waitcnt orders them all with a single vmcnt counter, in the absence of
-// s_waitcnt only instructions of the same VmemType are guaranteed to write
-// their results in order -- so there is no need to insert an s_waitcnt between
-// two instructions of the same type that write the same vgpr.
-enum VmemType {
- // BUF instructions and MIMG instructions without a sampler.
- VMEM_NOSAMPLER,
- // MIMG instructions with a sampler.
- VMEM_SAMPLER,
- // BVH instructions
- VMEM_BVH,
- NUM_VMEM_TYPES
-};
-
-// Maps values of InstCounterType to the instruction that waits on that
-// counter. Only used if GCNSubtarget::hasExtendedWaitCounts()
-// returns true.
-static const unsigned instrsForExtendedCounterTypes[NUM_EXTENDED_INST_CNTS] = {
- AMDGPU::S_WAIT_LOADCNT, AMDGPU::S_WAIT_DSCNT, AMDGPU::S_WAIT_EXPCNT,
- AMDGPU::S_WAIT_STORECNT, AMDGPU::S_WAIT_SAMPLECNT, AMDGPU::S_WAIT_BVHCNT,
- AMDGPU::S_WAIT_KMCNT};
-
-static bool updateVMCntOnly(const MachineInstr &Inst) {
- return SIInstrInfo::isVMEM(Inst) || SIInstrInfo::isFLATGlobal(Inst) ||
- SIInstrInfo::isFLATScratch(Inst);
-}
-
-#ifndef NDEBUG
-static bool isNormalMode(InstCounterType MaxCounter) {
- return MaxCounter == NUM_NORMAL_INST_CNTS;
-}
-#endif // NDEBUG
-
-VmemType getVmemType(const MachineInstr &Inst) {
- assert(updateVMCntOnly(Inst));
- if (!SIInstrInfo::isMIMG(Inst) && !SIInstrInfo::isVIMAGE(Inst) &&
- !SIInstrInfo::isVSAMPLE(Inst))
- return VMEM_NOSAMPLER;
- const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Inst.getOpcode());
- const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
- AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
- return BaseInfo->BVH ? VMEM_BVH
- : BaseInfo->Sampler ? VMEM_SAMPLER : VMEM_NOSAMPLER;
-}
-
-unsigned &getCounterRef(AMDGPU::Waitcnt &Wait, InstCounterType T) {
- switch (T) {
- case LOAD_CNT:
- return Wait.LoadCnt;
- case EXP_CNT:
- return Wait.ExpCnt;
- case DS_CNT:
- return Wait.DsCnt;
- case STORE_CNT:
- return Wait.StoreCnt;
- case SAMPLE_CNT:
- return Wait.SampleCnt;
- case BVH_CNT:
- return Wait.BvhCnt;
- case KM_CNT:
- return Wait.KmCnt;
- default:
- llvm_unreachable("bad InstCounterType");
- }
-}
-
-void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) {
- unsigned &WC = getCounterRef(Wait, T);
- WC = std::min(WC, Count);
-}
-
-void setNoWait(AMDGPU::Waitcnt &Wait, InstCounterType T) {
- getCounterRef(Wait, T) = ~0u;
-}
-
-unsigned getWait(AMDGPU::Waitcnt &Wait, InstCounterType T) {
- return getCounterRef(Wait, T);
-}
-
-// Mapping from event to counter according to the table masks.
-InstCounterType eventCounter(const unsigned *masks, WaitEventType E) {
- for (auto T : inst_counter_types()) {
- if (masks[T] & (1 << E))
- return T;
- }
- llvm_unreachable("event type has no associated counter");
-}
+//===----------------------------------------------------------------------===//
+// SIWaitCntsInserter helper class interface.
+//===----------------------------------------------------------------------===//
-// This objects maintains the current score brackets of each wait counter, and
-// a per-register scoreboard for each wait counter.
-//
-// We also maintain the latest score for every event type that can change the
-// waitcnt in order to know if there are multiple types of events within
-// the brackets. When multiple types of event happen in the bracket,
-// wait count may get decreased out of order, therefore we need to put in
-// "s_waitcnt 0" before use.
-class WaitcntBrackets {
+class SIWaitCntsInserter : public AMDGPUWaitCntInserter {
public:
- WaitcntBrackets(const GCNSubtarget *SubTarget, InstCounterType MaxCounter,
- HardwareLimits Limits, RegisterEncoding Encoding,
- const unsigned *WaitEventMaskForInst,
- InstCounterType SmemAccessCounter)
- : ST(SubTarget), MaxCounter(MaxCounter), Limits(Limits),
- Encoding(Encoding), WaitEventMaskForInst(WaitEventMaskForInst),
- SmemAccessCounter(SmemAccessCounter) {}
-
- unsigned getWaitCountMax(InstCounterType T) const {
- switch (T) {
- case LOAD_CNT:
- return Limits.LoadcntMax;
- case DS_CNT:
- return Limits.DscntMax;
- case EXP_CNT:
- return Limits.ExpcntMax;
- case STORE_CNT:
- return Limits.StorecntMax;
- case SAMPLE_CNT:
- return Limits.SamplecntMax;
- case BVH_CNT:
- return Limits.BvhcntMax;
- case KM_CNT:
- return Limits.KmcntMax;
- default:
- break;
- }
- return 0;
- }
-
- unsigned getScoreLB(InstCounterType T) const {
- assert(T < NUM_INST_CNTS);
- return ScoreLBs[T];
- }
-
- unsigned getScoreUB(InstCounterType T) const {
- assert(T < NUM_INST_CNTS);
- return ScoreUBs[T];
- }
-
- unsigned getScoreRange(InstCounterType T) const {
- return getScoreUB(T) - getScoreLB(T);
- }
-
- unsigned getRegScore(int GprNo, InstCounterType T) const {
- if (GprNo < NUM_ALL_VGPRS) {
- return VgprScores[T][GprNo];
- }
- assert(T == SmemAccessCounter);
- return SgprScores[GprNo - NUM_ALL_VGPRS];
- }
-
- bool merge(const WaitcntBrackets &Other);
-
- RegInterval getRegInterval(const MachineInstr *MI,
- const MachineRegisterInfo *MRI,
- const SIRegisterInfo *TRI, unsigned OpNo) const;
-
- bool counterOutOfOrder(InstCounterType T) const;
- void simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const;
- void simplifyWaitcnt(InstCounterType T, unsigned &Count) const;
- void determineWait(InstCounterType T, int RegNo, AMDGPU::Waitcnt &Wait) const;
- void applyWaitcnt(const AMDGPU::Waitcnt &Wait);
- void applyWaitcnt(InstCounterType T, unsigned Count);
- void updateByEvent(const SIInstrInfo *TII, const SIRegisterInfo *TRI,
- const MachineRegisterInfo *MRI, WaitEventType E,
- MachineInstr &MI);
-
- unsigned hasPendingEvent() const { return PendingEvents; }
- unsigned hasPendingEvent(WaitEventType E) const {
- return PendingEvents & (1 << E);
- }
- unsigned hasPendingEvent(InstCounterType T) const {
- unsigned HasPending = PendingEvents & WaitEventMaskForInst[T];
- assert((HasPending != 0) == (getScoreRange(T) != 0));
- return HasPending;
- }
-
- bool hasMixedPendingEvents(InstCounterType T) const {
- unsigned Events = hasPendingEvent(T);
- // Return true if more than one bit is set in Events.
- return Events & (Events - 1);
- }
-
- bool hasPendingFlat() const {
- return ((LastFlat[DS_CNT] > ScoreLBs[DS_CNT] &&
- LastFlat[DS_CNT] <= ScoreUBs[DS_CNT]) ||
- (LastFlat[LOAD_CNT] > ScoreLBs[LOAD_CNT] &&
- LastFlat[LOAD_CNT] <= ScoreUBs[LOAD_CNT]));
- }
-
- void setPendingFlat() {
- LastFlat[LOAD_CNT] = ScoreUBs[LOAD_CNT];
- LastFlat[DS_CNT] = ScoreUBs[DS_CNT];
- }
-
- // Return true if there might be pending writes to the specified vgpr by VMEM
- // instructions with types different from V.
- bool hasOtherPendingVmemTypes(int GprNo, VmemType V) const {
- assert(GprNo < NUM_ALL_VGPRS);
- return VgprVmemTypes[GprNo] & ~(1 << V);
- }
-
- void clearVgprVmemTypes(int GprNo) {
- assert(GprNo < NUM_ALL_VGPRS);
- VgprVmemTypes[GprNo] = 0;
- }
-
- void setStateOnFunctionEntryOrReturn() {
- setScoreUB(STORE_CNT, getScoreUB(STORE_CNT) + getWaitCountMax(STORE_CNT));
- PendingEvents |= WaitEventMaskForInst[STORE_CNT];
- }
-
- ArrayRef<const MachineInstr *> getLDSDMAStores() const {
- return LDSDMAStores;
+ SIWaitCntsInserter() {}
+ SIWaitCntsInserter(const GCNSubtarget *ST, const MachineRegisterInfo *MRI,
+ WaitCntGenerator *WCG, InstCounterType MC, bool FEZWC,
+ MachineLoopInfo *MLI, MachinePostDominatorTree *PDT,
+ AliasAnalysis *AA)
+ : AMDGPUWaitCntInserter(ST, MRI, WCG, MC), MLI(MLI), PDT(PDT), AA(AA),
+ ForceEmitZeroWaitcnts(FEZWC) {
+ for (auto T : inst_counter_types())
+ ForceEmitWaitcnt[T] = false;
}
-
- void print(raw_ostream &);
- void dump() { print(dbgs()); }
+ bool generateWaitcntInstBefore(MachineInstr &MI,
+ WaitcntBrackets &ScoreBrackets,
+ MachineInstr *OldWaitcntInstr, bool FlushVmCnt,
+ VGPRInstsSet *VGPRInsts) override;
+ bool insertWaitcntInBlock(MachineFunction &MF, MachineBasicBlock &Block,
+ WaitcntBrackets &ScoreBrackets,
+ VGPRInstsSet *VGPRInsts = nullptr) override;
+ void updateEventWaitcntAfter(MachineInstr &Inst,
+ WaitcntBrackets *ScoreBrackets) override;
private:
- struct MergeInfo {
- unsigned OldLB;
- unsigned OtherLB;
- unsigned MyShift;
- unsigned OtherShift;
- };
- static bool mergeScore(const MergeInfo &M, unsigned &Score,
- unsigned OtherScore);
-
- void setScoreLB(InstCounterType T, unsigned Val) {
- assert(T < NUM_INST_CNTS);
- ScoreLBs[T] = Val;
- }
-
- void setScoreUB(InstCounterType T, unsigned Val) {
- assert(T < NUM_INST_CNTS);
- ScoreUBs[T] = Val;
-
- if (T != EXP_CNT)
- return;
-
- if (getScoreRange(EXP_CNT) > getWaitCountMax(EXP_CNT))
- ScoreLBs[EXP_CNT] = ScoreUBs[EXP_CNT] - getWaitCountMax(EXP_CNT);
- }
-
- void setRegScore(int GprNo, InstCounterType T, unsigned Val) {
- if (GprNo < NUM_ALL_VGPRS) {
- VgprUB = std::max(VgprUB, GprNo);
- VgprScores[T][GprNo] = Val;
- } else {
- assert(T == SmemAccessCounter);
- SgprUB = std::max(SgprUB, GprNo - NUM_ALL_VGPRS);
- SgprScores[GprNo - NUM_ALL_VGPRS] = Val;
- }
- }
-
- void setExpScore(const MachineInstr *MI, const SIInstrInfo *TII,
- const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI,
- unsigned OpNo, unsigned Val);
-
- const GCNSubtarget *ST = nullptr;
- InstCounterType MaxCounter = NUM_EXTENDED_INST_CNTS;
- HardwareLimits Limits = {};
- RegisterEncoding Encoding = {};
- const unsigned *WaitEventMaskForInst;
- InstCounterType SmemAccessCounter;
- unsigned ScoreLBs[NUM_INST_CNTS] = {0};
- unsigned ScoreUBs[NUM_INST_CNTS] = {0};
- unsigned PendingEvents = 0;
- // Remember the last flat memory operation.
- unsigned LastFlat[NUM_INST_CNTS] = {0};
- // wait_cnt scores for every vgpr.
- // Keep track of the VgprUB and SgprUB to make merge at join efficient.
- int VgprUB = -1;
- int SgprUB = -1;
- unsigned VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS] = {{0}};
- // Wait cnt scores for every sgpr, only DS_CNT (corresponding to LGKMcnt
- // pre-gfx12) or KM_CNT (gfx12+ only) are relevant.
- unsigned SgprScores[SQ_MAX_PGM_SGPRS] = {0};
- // Bitmask of the VmemTypes of VMEM instructions that might have a pending
- // write to each vgpr.
- unsigned char VgprVmemTypes[NUM_ALL_VGPRS] = {0};
- // Store representative LDS DMA operations. The only useful info here is
- // alias info. One store is kept per unique AAInfo.
- SmallVector<const MachineInstr *, NUM_EXTRA_VGPRS - 1> LDSDMAStores;
-};
-
-// This abstracts the logic for generating and updating S_WAIT* instructions
-// away from the analysis that determines where they are needed. This was
-// done because the set of counters and instructions for waiting on them
-// underwent a major shift with gfx12, sufficiently so that having this
-// abstraction allows the main analysis logic to be simpler than it would
-// otherwise have had to become.
-class WaitcntGenerator {
-protected:
- const GCNSubtarget *ST = nullptr;
- const SIInstrInfo *TII = nullptr;
- AMDGPU::IsaVersion IV;
- InstCounterType MaxCounter;
-
-public:
- WaitcntGenerator() {}
- WaitcntGenerator(const GCNSubtarget *ST, InstCounterType MaxCounter)
- : ST(ST), TII(ST->getInstrInfo()),
- IV(AMDGPU::getIsaVersion(ST->getCPU())), MaxCounter(MaxCounter) {}
-
- // Edits an existing sequence of wait count instructions according
- // to an incoming Waitcnt value, which is itself updated to reflect
- // any new wait count instructions which may need to be generated by
- // WaitcntGenerator::createNewWaitcnt(). It will return true if any edits
- // were made.
- //
- // This editing will usually be merely updated operands, but it may also
- // delete instructions if the incoming Wait value indicates they are not
- // needed. It may also remove existing instructions for which a wait
- // is needed if it can be determined that it is better to generate new
- // instructions later, as can happen on gfx12.
- virtual bool
- applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets,
- MachineInstr &OldWaitcntInstr, AMDGPU::Waitcnt &Wait,
- MachineBasicBlock::instr_iterator It) const = 0;
-
- // Transform a soft waitcnt into a normal one.
- bool promoteSoftWaitCnt(MachineInstr *Waitcnt) const;
-
- // Generates new wait count instructions according to the value of
- // Wait, returning true if any new instructions were created.
- virtual bool createNewWaitcnt(MachineBasicBlock &Block,
- MachineBasicBlock::instr_iterator It,
- AMDGPU::Waitcnt Wait) = 0;
-
- // Returns an array of bit masks which can be used to map values in
- // WaitEventType to corresponding counter values in InstCounterType.
- virtual const unsigned *getWaitEventMask() const = 0;
-
- // Returns a new waitcnt with all counters except VScnt set to 0. If
- // IncludeVSCnt is true, VScnt is set to 0, otherwise it is set to ~0u.
- virtual AMDGPU::Waitcnt getAllZeroWaitcnt(bool IncludeVSCnt) const = 0;
-
- virtual ~WaitcntGenerator() = default;
-};
-
-class WaitcntGeneratorPreGFX12 : public WaitcntGenerator {
-public:
- WaitcntGeneratorPreGFX12() {}
- WaitcntGeneratorPreGFX12(const GCNSubtarget *ST)
- : WaitcntGenerator(ST, NUM_NORMAL_INST_CNTS) {}
-
- bool
- applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets,
- MachineInstr &OldWaitcntInstr, AMDGPU::Waitcnt &Wait,
- MachineBasicBlock::instr_iterator It) const override;
-
- bool createNewWaitcnt(MachineBasicBlock &Block,
- MachineBasicBlock::instr_iterator It,
- AMDGPU::Waitcnt Wait) override;
-
- const unsigned *getWaitEventMask() const override {
- assert(ST);
-
- static const unsigned WaitEventMaskForInstPreGFX12[NUM_INST_CNTS] = {
- (1 << VMEM_ACCESS) | (1 << VMEM_READ_ACCESS) |
- (1 << VMEM_SAMPLER_READ_ACCESS) | (1 << VMEM_BVH_READ_ACCESS),
- (1 << SMEM_ACCESS) | (1 << LDS_ACCESS) | (1 << GDS_ACCESS) |
- (1 << SQ_MESSAGE),
- (1 << EXP_GPR_LOCK) | (1 << GDS_GPR_LOCK) | (1 << VMW_GPR_LOCK) |
- (1 << EXP_PARAM_ACCESS) | (1 << EXP_POS_ACCESS) |
- (1 << EXP_LDS_ACCESS),
- (1 << VMEM_WRITE_ACCESS) | (1 << SCRATCH_WRITE_ACCESS),
- 0,
- 0,
- 0};
-
- return WaitEventMaskForInstPreGFX12;
- }
-
- virtual AMDGPU::Waitcnt getAllZeroWaitcnt(bool IncludeVSCnt) const override;
-};
-
-class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
-public:
- WaitcntGeneratorGFX12Plus() {}
- WaitcntGeneratorGFX12Plus(const GCNSubtarget *ST, InstCounterType MaxCounter)
- : WaitcntGenerator(ST, MaxCounter) {}
-
- bool
- applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets,
- MachineInstr &OldWaitcntInstr, AMDGPU::Waitcnt &Wait,
- MachineBasicBlock::instr_iterator It) const override;
-
- bool createNewWaitcnt(MachineBasicBlock &Bl...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/83018
More information about the llvm-commits
mailing list