[llvm] e151434 - [AMDGPU][InsertWaitCnts][NFC] Merge VMEM_ACCESS and VMEM_READ_ACCESS into a single event type (#171973)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 12 04:56:44 PST 2025
Author: Pankaj Dwivedi
Date: 2025-12-12T18:26:40+05:30
New Revision: e151434b0f059f844a4e4e73812083f5a34e6524
URL: https://github.com/llvm/llvm-project/commit/e151434b0f059f844a4e4e73812083f5a34e6524
DIFF: https://github.com/llvm/llvm-project/commit/e151434b0f059f844a4e4e73812083f5a34e6524.diff
LOG: [AMDGPU][InsertWaitCnts][NFC] Merge VMEM_ACCESS and VMEM_READ_ACCESS into a single event type (#171973)
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 7c0525b9c9957..0a743dbc2e258 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -144,8 +144,7 @@ struct HardwareLimits {
};
#define AMDGPU_DECLARE_WAIT_EVENTS(DECL) \
- DECL(VMEM_ACCESS) /* vmem read & write */ \
- DECL(VMEM_READ_ACCESS) /* vmem read */ \
+ DECL(VMEM_ACCESS) /* vmem read & write (pre-gfx10), vmem read (gfx10+) */ \
DECL(VMEM_SAMPLER_READ_ACCESS) /* vmem SAMPLER read (gfx12+ only) */ \
DECL(VMEM_BVH_READ_ACCESS) /* vmem BVH read (gfx12+ only) */ \
DECL(VMEM_WRITE_ACCESS) /* vmem write that is not scratch */ \
@@ -369,8 +368,8 @@ class WaitcntGeneratorPreGFX12 : public WaitcntGenerator {
assert(ST);
static const unsigned WaitEventMaskForInstPreGFX12[NUM_INST_CNTS] = {
- eventMask({VMEM_ACCESS, VMEM_READ_ACCESS, VMEM_SAMPLER_READ_ACCESS,
- VMEM_BVH_READ_ACCESS}),
+ eventMask(
+ {VMEM_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS}),
eventMask({SMEM_ACCESS, LDS_ACCESS, GDS_ACCESS, SQ_MESSAGE}),
eventMask({EXP_GPR_LOCK, GDS_GPR_LOCK, VMW_GPR_LOCK, EXP_PARAM_ACCESS,
EXP_POS_ACCESS, EXP_LDS_ACCESS}),
@@ -403,7 +402,7 @@ class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
assert(ST);
static const unsigned WaitEventMaskForInstGFX12Plus[NUM_INST_CNTS] = {
- eventMask({VMEM_ACCESS, VMEM_READ_ACCESS}),
+ eventMask({VMEM_ACCESS}),
eventMask({LDS_ACCESS, GDS_ACCESS}),
eventMask({EXP_GPR_LOCK, GDS_GPR_LOCK, VMW_GPR_LOCK, EXP_PARAM_ACCESS,
EXP_POS_ACCESS, EXP_LDS_ACCESS}),
@@ -537,7 +536,7 @@ class SIInsertWaitcnts {
switch (Inst.getOpcode()) {
// FIXME: GLOBAL_INV needs to be tracked with xcnt too.
case AMDGPU::GLOBAL_INV:
- return VMEM_READ_ACCESS; // tracked using loadcnt
+ return VMEM_ACCESS; // tracked using loadcnt
case AMDGPU::GLOBAL_WB:
case AMDGPU::GLOBAL_WBINV:
return VMEM_WRITE_ACCESS; // tracked using storecnt
@@ -547,7 +546,7 @@ class SIInsertWaitcnts {
// Maps VMEM access types to their corresponding WaitEventType.
static const WaitEventType VmemReadMapping[NUM_VMEM_TYPES] = {
- VMEM_READ_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS};
+ VMEM_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS};
assert(SIInstrInfo::isVMEM(Inst));
// LDS DMA loads are also stores, but on the LDS side. On the VMEM side
@@ -561,7 +560,7 @@ class SIInsertWaitcnts {
return VMEM_WRITE_ACCESS;
}
if (!ST->hasExtendedWaitCounts() || SIInstrInfo::isFLAT(Inst))
- return VMEM_READ_ACCESS;
+ return VMEM_ACCESS;
return VmemReadMapping[getVmemType(Inst)];
}
More information about the llvm-commits
mailing list