[llvm] [AMDGPU] Classify FLAT instructions as VMEM (PR #137148)
Robert Imschweiler via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 24 03:06:19 PDT 2025
https://github.com/ro-i updated https://github.com/llvm/llvm-project/pull/137148
>From 15c1d84dc6bf4aa9a3f1807335295b8ea8c53ba1 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Thu, 24 Apr 2025 05:02:55 -0500
Subject: [PATCH] [AMDGPU] Classify FLAT instructions as VMEM
Also adapt hazard and wait handling.
---
llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp | 12 ++---
.../Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp | 3 +-
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 49 ++++++++-----------
.../AMDGPU/MCA/AMDGPUCustomBehaviour.cpp | 3 +-
.../lib/Target/AMDGPU/SIFormMemoryClauses.cpp | 2 +-
.../lib/Target/AMDGPU/SIInsertHardClauses.cpp | 4 +-
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 14 +++---
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 4 +-
llvm/test/CodeGen/AMDGPU/hard-clauses.mir | 21 +++++---
.../CodeGen/AMDGPU/lds-branch-vmem-hazard.mir | 7 ++-
10 files changed, 63 insertions(+), 56 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index 87c1d2586cce5..c0731f6bbdd32 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -2420,29 +2420,29 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
Result = true;
else if (((SGMask & SchedGroupMask::VMEM) != SchedGroupMask::NONE) &&
- (TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))))
+ TII->isVMEM(MI))
Result = true;
else if (((SGMask & SchedGroupMask::VMEM_READ) != SchedGroupMask::NONE) &&
MI.mayLoad() &&
- (TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))))
+ TII->isVMEM(MI))
Result = true;
else if (((SGMask & SchedGroupMask::VMEM_WRITE) != SchedGroupMask::NONE) &&
MI.mayStore() &&
- (TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))))
+ TII->isVMEM(MI))
Result = true;
else if (((SGMask & SchedGroupMask::DS) != SchedGroupMask::NONE) &&
- TII->isDS(MI))
+ (TII->isDS(MI) || TII->isLDSDMA(MI)))
Result = true;
else if (((SGMask & SchedGroupMask::DS_READ) != SchedGroupMask::NONE) &&
- MI.mayLoad() && TII->isDS(MI))
+ MI.mayLoad() && (TII->isDS(MI) || TII->isLDSDMA(MI)))
Result = true;
else if (((SGMask & SchedGroupMask::DS_WRITE) != SchedGroupMask::NONE) &&
- MI.mayStore() && TII->isDS(MI))
+ MI.mayStore() && (TII->isDS(MI) || TII->isLDSDMA(MI)))
Result = true;
else if (((SGMask & SchedGroupMask::TRANS) != SchedGroupMask::NONE) &&
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
index bfdd8cf1bc2b1..a5e5f2912cbdd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
@@ -232,7 +232,8 @@ class AMDGPUWaitSGPRHazards {
State.ActiveFlat = true;
// SMEM or VMEM clears hazards
- if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSMRD(*MI)) {
+ // FIXME: adapt to add FLAT without VALU (so !isLDSDMA())?
+ if ((SIInstrInfo::isVMEM(*MI) && !SIInstrInfo::isFLAT(*MI)) || SIInstrInfo::isSMRD(*MI)) {
State.VCCHazard = HazardState::None;
State.SALUHazards.reset();
State.VALUHazards.reset();
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index aaefe27b1324f..50d518e45acf0 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -183,10 +183,7 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (ST.hasNoDataDepHazard())
return NoHazard;
- // FIXME: Should flat be considered vmem?
- if ((SIInstrInfo::isVMEM(*MI) ||
- SIInstrInfo::isFLAT(*MI))
- && checkVMEMHazards(MI) > 0)
+ if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0)
return HazardType;
if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
@@ -202,8 +199,8 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
return HazardType;
if ((SIInstrInfo::isVALU(*MI) || SIInstrInfo::isVMEM(*MI) ||
- SIInstrInfo::isFLAT(*MI) || SIInstrInfo::isDS(*MI) ||
- SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0)
+ SIInstrInfo::isDS(*MI) || SIInstrInfo::isEXP(*MI)) &&
+ checkMAIVALUHazards(MI) > 0)
return HazardType;
if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
@@ -230,7 +227,6 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
return HazardType;
if ((SIInstrInfo::isVMEM(*MI) ||
- SIInstrInfo::isFLAT(*MI) ||
SIInstrInfo::isDS(*MI)) && checkMAILdStHazards(MI) > 0)
return HazardType;
@@ -324,7 +320,7 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
if (ST.hasNoDataDepHazard())
return WaitStates;
- if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
+ if (SIInstrInfo::isVMEM(*MI))
WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
if (SIInstrInfo::isVALU(*MI))
@@ -340,8 +336,8 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
if ((SIInstrInfo::isVALU(*MI) || SIInstrInfo::isVMEM(*MI) ||
- SIInstrInfo::isFLAT(*MI) || SIInstrInfo::isDS(*MI) ||
- SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0)
+ SIInstrInfo::isDS(*MI) || SIInstrInfo::isEXP(*MI)) &&
+ checkMAIVALUHazards(MI) > 0)
WaitStates = std::max(WaitStates, checkMAIVALUHazards(MI));
if (MI->isInlineAsm())
@@ -370,7 +366,6 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
return std::max(WaitStates, checkMAIHazards(MI));
if (SIInstrInfo::isVMEM(*MI) ||
- SIInstrInfo::isFLAT(*MI) ||
SIInstrInfo::isDS(*MI))
return std::max(WaitStates, checkMAILdStHazards(MI));
@@ -598,7 +593,7 @@ static bool breaksSMEMSoftClause(MachineInstr *MI) {
}
static bool breaksVMEMSoftClause(MachineInstr *MI) {
- return !SIInstrInfo::isVMEM(*MI) && !SIInstrInfo::isFLAT(*MI);
+ return !SIInstrInfo::isVMEM(*MI);
}
int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
@@ -1250,8 +1245,7 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
const SIRegisterInfo *TRI = ST.getRegisterInfo();
auto IsHazardFn = [TRI, MI](const MachineInstr &I) {
- if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isDS(I) &&
- !SIInstrInfo::isFLAT(I))
+ if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isDS(I))
return false;
for (const MachineOperand &Def : MI->defs()) {
@@ -1424,9 +1418,8 @@ static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF,
bool HasVmem = false;
for (auto &MBB : MF) {
for (auto &MI : MBB) {
- HasLds |= SIInstrInfo::isDS(MI);
- HasVmem |=
- SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI);
+ HasLds |= SIInstrInfo::isDS(MI) || SIInstrInfo::isLDSDMA(MI);
+ HasVmem |= SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isLDSDMA(MI);
if (HasLds && HasVmem)
return true;
}
@@ -1448,9 +1441,9 @@ bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
assert(!ST.hasExtendedWaitCounts());
auto IsHazardInst = [](const MachineInstr &MI) {
- if (SIInstrInfo::isDS(MI))
+ if (SIInstrInfo::isDS(MI) || SIInstrInfo::isLDSDMA(MI))
return 1;
- if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI))
+ if (SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isLDSDMA(MI))
return 2;
return 0;
};
@@ -1517,8 +1510,8 @@ bool GCNHazardRecognizer::fixLdsDirectVALUHazard(MachineInstr *MI) {
if (WaitStates >= NoHazardWaitStates)
return true;
// Instructions which cause va_vdst==0 expire hazard
- return SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
- SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I);
+ return SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
+ SIInstrInfo::isEXP(I);
};
auto GetWaitStatesFn = [](const MachineInstr &MI) {
return SIInstrInfo::isVALU(MI) ? 1 : 0;
@@ -1549,8 +1542,7 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {
const Register VDSTReg = VDST->getReg();
auto IsHazardFn = [this, VDSTReg](const MachineInstr &I) {
- if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I) &&
- !SIInstrInfo::isDS(I))
+ if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isDS(I))
return false;
return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI);
};
@@ -1635,8 +1627,8 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
return HazardExpired;
// Instructions which cause va_vdst==0 expire hazard
- if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
- SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
+ if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
+ SIInstrInfo::isEXP(I) ||
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
AMDGPU::DepCtr::decodeFieldVaVdst(I.getOperand(0).getImm()) == 0))
return HazardExpired;
@@ -1772,8 +1764,8 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
return HazardExpired;
// Instructions which cause va_vdst==0 expire hazard
- if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
- SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
+ if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
+ SIInstrInfo::isEXP(I) ||
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
I.getOperand(0).getImm() == 0x0fff))
return HazardExpired;
@@ -2003,7 +1995,7 @@ int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
return 0;
auto IsHazardFn = [](const MachineInstr &I) {
- if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I))
+ if (!SIInstrInfo::isVMEM(I))
return false;
return SIInstrInfo::isFPAtomic(I);
};
@@ -2626,7 +2618,6 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
int WaitStatesNeeded = 0;
bool IsMem = SIInstrInfo::isVMEM(*MI) ||
- SIInstrInfo::isFLAT(*MI) ||
SIInstrInfo::isDS(*MI);
bool IsMemOrExport = IsMem || SIInstrInfo::isEXP(*MI);
bool IsVALU = SIInstrInfo::isVALU(*MI);
diff --git a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
index 4802ed4bb53df..f00c95f71f467 100644
--- a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
+++ b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
@@ -303,7 +303,8 @@ void AMDGPUCustomBehaviour::generateWaitCntInfo() {
bool AMDGPUCustomBehaviour::isVMEM(const MCInstrDesc &MCID) {
return MCID.TSFlags & SIInstrFlags::MUBUF ||
MCID.TSFlags & SIInstrFlags::MTBUF ||
- MCID.TSFlags & SIInstrFlags::MIMG;
+ MCID.TSFlags & SIInstrFlags::MIMG ||
+ MCID.TSFlags & SIInstrFlags::FLAT;
}
// taken from SIInstrInfo::hasModifiersSet()
diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index bbc0280aed42e..7524747833468 100644
--- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -100,7 +100,7 @@ FunctionPass *llvm::createSIFormMemoryClausesLegacyPass() {
}
static bool isVMEMClauseInst(const MachineInstr &MI) {
- return SIInstrInfo::isFLAT(MI) || SIInstrInfo::isVMEM(MI);
+ return SIInstrInfo::isVMEM(MI);
}
static bool isSMEMClauseInst(const MachineInstr &MI) {
diff --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
index 88ff04d55629c..fd8023b3455c2 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
@@ -97,7 +97,7 @@ class SIInsertHardClauses {
HardClauseType getHardClauseType(const MachineInstr &MI) {
if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
- if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
+ if (SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isLDSDMA(MI)) {
if (ST->hasNSAClauseBug()) {
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
@@ -121,7 +121,7 @@ class SIInsertHardClauses {
: HARDCLAUSE_MIMG_LOAD
: HARDCLAUSE_MIMG_STORE;
}
- if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
+ if (SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isLDSDMA(MI)) {
return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC
: HARDCLAUSE_VMEM_LOAD
: HARDCLAUSE_VMEM_STORE;
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 8848eebdeb6b3..7ccab028d0f1a 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -168,8 +168,8 @@ static const unsigned instrsForExtendedCounterTypes[NUM_EXTENDED_INST_CNTS] = {
AMDGPU::S_WAIT_KMCNT};
static bool updateVMCntOnly(const MachineInstr &Inst) {
- return SIInstrInfo::isVMEM(Inst) || SIInstrInfo::isFLATGlobal(Inst) ||
- SIInstrInfo::isFLATScratch(Inst);
+ return (SIInstrInfo::isVMEM(Inst) && !SIInstrInfo::isFLAT(Inst)) ||
+ SIInstrInfo::isFLATGlobal(Inst) || SIInstrInfo::isFLATScratch(Inst);
}
#ifndef NDEBUG
@@ -695,14 +695,14 @@ class SIInsertWaitcnts {
#endif // NDEBUG
}
- // Return the appropriate VMEM_*_ACCESS type for Inst, which must be a VMEM or
- // FLAT instruction.
+ // Return the appropriate VMEM_*_ACCESS type for Inst, which must be a VMEM
+ // instruction.
WaitEventType getVmemWaitEventType(const MachineInstr &Inst) const {
// Maps VMEM access types to their corresponding WaitEventType.
static const WaitEventType VmemReadMapping[NUM_VMEM_TYPES] = {
VMEM_READ_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS};
- assert(SIInstrInfo::isVMEM(Inst) || SIInstrInfo::isFLAT(Inst));
+ assert(SIInstrInfo::isVMEM(Inst));
// LDS DMA loads are also stores, but on the LDS side. On the VMEM side
// these should use VM_CNT.
if (!ST->hasVscnt() || SIInstrInfo::mayWriteLDSThroughDMA(Inst))
@@ -2454,8 +2454,8 @@ bool SIInsertWaitcnts::isPreheaderToFlush(
}
bool SIInsertWaitcnts::isVMEMOrFlatVMEM(const MachineInstr &MI) const {
- return SIInstrInfo::isVMEM(MI) ||
- (SIInstrInfo::isFLAT(MI) && mayAccessVMEMThroughFlat(MI));
+ return (SIInstrInfo::isFLAT(MI) && mayAccessVMEMThroughFlat(MI)) ||
+ SIInstrInfo::isVMEM(MI);
}
// Return true if it is better to flush the vmcnt counter in the preheader of
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index a3a54659d299a..30de92ae83532 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -449,7 +449,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
}
static bool isVMEM(const MachineInstr &MI) {
- return isMUBUF(MI) || isMTBUF(MI) || isImage(MI);
+ if (isFLAT(MI))
+ assert(usesVM_CNT(MI) && "oh no");
+ return isMUBUF(MI) || isMTBUF(MI) || isImage(MI) || isFLAT(MI);
}
bool isVMEM(uint16_t Opcode) const {
diff --git a/llvm/test/CodeGen/AMDGPU/hard-clauses.mir b/llvm/test/CodeGen/AMDGPU/hard-clauses.mir
index 44b988a7121c7..bdb496f378548 100644
--- a/llvm/test/CodeGen/AMDGPU/hard-clauses.mir
+++ b/llvm/test/CodeGen/AMDGPU/hard-clauses.mir
@@ -630,20 +630,29 @@ body: |
; CHECK-LABEL: name: flat_global_load
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
- ; CHECK-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
+ ; CHECK-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0_vgpr1, implicit $exec, implicit $flat_scr {
+ ; CHECK-NEXT: S_CLAUSE 1
+ ; CHECK-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; CHECK-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
+ ; CHECK-NEXT: }
;
; GFX11-LABEL: name: flat_global_load
; GFX11: liveins: $vgpr0_vgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
- ; GFX11-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
+ ; GFX11-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0_vgpr1, implicit $exec, implicit $flat_scr {
+ ; GFX11-NEXT: S_CLAUSE 1
+ ; GFX11-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX11-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
+ ; GFX11-NEXT: }
;
; GFX12-LABEL: name: flat_global_load
; GFX12: liveins: $vgpr0_vgpr1
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
- ; GFX12-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0_vgpr1, implicit $exec, implicit $flat_scr {
+ ; GFX12-NEXT: S_CLAUSE 1
+ ; GFX12-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
+ ; GFX12-NEXT: }
$vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
...
diff --git a/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir b/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir
index 86e657093b5b2..245abf03811d0 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir
+++ b/llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir
@@ -269,11 +269,14 @@ body: |
S_ENDPGM 0
...
-# GCN-LABEL: name: no_hazard_lds_branch_flat
+# FLAT_* instructions are "based on per-thread address (VGPR), can load/store:
+# global memory, LDS or scratch memory" (RDNA4 ISA)
+# GCN-LABEL: name: hazard_lds_branch_flat
# GCN: bb.1:
+# GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
# GCN-NEXT: FLAT_LOAD_DWORD
---
-name: no_hazard_lds_branch_flat
+name: hazard_lds_branch_flat
body: |
bb.0:
successors: %bb.1
More information about the llvm-commits
mailing list