[llvm] [AMDGPU] SIMemoryLegalizer: Factor out check if memory operations can affect the global AS (PR #160129)
Fabian Ritter via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 23 02:54:54 PDT 2025
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/160129
>From a74e810b84577c80a5e8d9788e783381a63c6475 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Mon, 22 Sep 2025 11:20:16 -0400
Subject: [PATCH 1/2] [AMDGPU] SIMemoryLegalizer: Factor out check if memory
operations can affect the global AS
Mostly NFC, and adds an assertion for gfx12 to ensure that no atomic scratch
instructions are present in the case of GloballyAddressableScratch. This should
always hold because of #154710.
---
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 53 +++++++++++++-------
1 file changed, 35 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index c85d2bb9fe9ae..23133b28a4af8 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -299,6 +299,10 @@ class SICacheControl {
bool enableNamedBit(const MachineBasicBlock::iterator MI,
AMDGPU::CPol::CPol Bit) const;
+ /// Check if any atomic operation on AS can affect memory accessible via the
+ /// global address space.
+ virtual bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS) const = 0;
+
public:
/// Create a cache control for the subtarget \p ST.
@@ -403,6 +407,10 @@ class SIGfx6CacheControl : public SICacheControl {
return enableNamedBit(MI, AMDGPU::CPol::SLC);
}
+ bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS) const override {
+ return (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE;
+ }
+
public:
SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {}
@@ -609,6 +617,15 @@ class SIGfx12CacheControl : public SIGfx11CacheControl {
bool setAtomicScope(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const;
+ bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS) const override {
+ assert((!ST.hasGloballyAddressableScratch() ||
+ ((AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) ||
+ (AS & SIAtomicAddrSpace::SCRATCH) == SIAtomicAddrSpace::NONE) &&
+ "scratch instructions should already be replaced by flat "
+ "instructions if GloballyAddressableScratch is enabled");
+ return (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE;
+ }
+
public:
SIGfx12CacheControl(const GCNSubtarget &ST) : SIGfx11CacheControl(ST) {
// GFX12.0 and GFX12.5 memory models greatly overlap, and in some cases
@@ -1016,7 +1033,7 @@ bool SIGfx6CacheControl::enableLoadCacheBypass(
assert(MI->mayLoad() && !MI->mayStore());
bool Changed = false;
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if (canAffectGlobalAddrSpace(AddrSpace)) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
@@ -1239,7 +1256,7 @@ bool SIGfx6CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
if (Pos == Position::AFTER)
++MI;
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if (canAffectGlobalAddrSpace(AddrSpace)) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
@@ -1299,7 +1316,7 @@ bool SIGfx7CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
if (Pos == Position::AFTER)
++MI;
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if (canAffectGlobalAddrSpace(AddrSpace)) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
@@ -1336,7 +1353,7 @@ bool SIGfx90ACacheControl::enableLoadCacheBypass(
assert(MI->mayLoad() && !MI->mayStore());
bool Changed = false;
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if (canAffectGlobalAddrSpace(AddrSpace)) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
@@ -1378,7 +1395,7 @@ bool SIGfx90ACacheControl::enableRMWCacheBypass(
assert(MI->mayLoad() && MI->mayStore());
bool Changed = false;
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if (canAffectGlobalAddrSpace(AddrSpace)) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
@@ -1487,7 +1504,7 @@ bool SIGfx90ACacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
if (Pos == Position::AFTER)
++MI;
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if (canAffectGlobalAddrSpace(AddrSpace)) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
// Ensures that following loads will not see stale remote VMEM data or
@@ -1551,7 +1568,7 @@ bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI,
if (Pos == Position::AFTER)
++MI;
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if (canAffectGlobalAddrSpace(AddrSpace)) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
// Inserting a "S_WAITCNT vmcnt(0)" before is not required because the
@@ -1594,7 +1611,7 @@ bool SIGfx940CacheControl::enableLoadCacheBypass(
assert(MI->mayLoad() && !MI->mayStore());
bool Changed = false;
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if (canAffectGlobalAddrSpace(AddrSpace)) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
// Set SC bits to indicate system scope.
@@ -1638,7 +1655,7 @@ bool SIGfx940CacheControl::enableStoreCacheBypass(
assert(!MI->mayLoad() && MI->mayStore());
bool Changed = false;
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if (canAffectGlobalAddrSpace(AddrSpace)) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
// Set SC bits to indicate system scope.
@@ -1678,7 +1695,7 @@ bool SIGfx940CacheControl::enableRMWCacheBypass(
assert(MI->mayLoad() && MI->mayStore());
bool Changed = false;
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if (canAffectGlobalAddrSpace(AddrSpace)) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
// Set SC1 bit to indicate system scope.
@@ -1756,7 +1773,7 @@ bool SIGfx940CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
if (Pos == Position::AFTER)
++MI;
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if (canAffectGlobalAddrSpace(AddrSpace)) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
// Ensures that following loads will not see stale remote VMEM data or
@@ -1840,7 +1857,7 @@ bool SIGfx940CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
if (Pos == Position::AFTER)
++MI;
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if (canAffectGlobalAddrSpace(AddrSpace)) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
// Inserting a "S_WAITCNT vmcnt(0)" before is not required because the
@@ -1897,7 +1914,7 @@ bool SIGfx10CacheControl::enableLoadCacheBypass(
assert(MI->mayLoad() && !MI->mayStore());
bool Changed = false;
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if (canAffectGlobalAddrSpace(AddrSpace)) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
@@ -2129,7 +2146,7 @@ bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
if (Pos == Position::AFTER)
++MI;
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if (canAffectGlobalAddrSpace(AddrSpace)) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
@@ -2194,7 +2211,7 @@ bool SIGfx11CacheControl::enableLoadCacheBypass(
assert(MI->mayLoad() && !MI->mayStore());
bool Changed = false;
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if (canAffectGlobalAddrSpace(AddrSpace)) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
@@ -2462,7 +2479,7 @@ bool SIGfx12CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
/// memory.
/// Other address spaces do not have a cache.
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE)
+ if (!canAffectGlobalAddrSpace(AddrSpace))
return false;
AMDGPU::CPol::CPol ScopeImm = AMDGPU::CPol::SCOPE_DEV;
@@ -2523,7 +2540,7 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
// writeback as all memory operations by the same thread are
// sequentially consistent, and no other thread can access scratch
// memory.
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if (canAffectGlobalAddrSpace(AddrSpace)) {
if (Pos == Position::AFTER)
++MI;
@@ -2655,7 +2672,7 @@ bool SIGfx12CacheControl::setAtomicScope(const MachineBasicBlock::iterator &MI,
SIAtomicAddrSpace AddrSpace) const {
bool Changed = false;
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if (canAffectGlobalAddrSpace(AddrSpace)) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SYS);
>From 1ae7b416e5c5d0f32103c64eddae46b92a58493a Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Tue, 23 Sep 2025 03:05:45 -0400
Subject: [PATCH 2/2] Move the canAffectGlobalAddrSpace implementation to the
SICacheControl base class.
---
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 24 ++++++++------------
1 file changed, 10 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 23133b28a4af8..27dc4ead4c364 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -301,7 +301,7 @@ class SICacheControl {
/// Check if any atomic operation on AS can affect memory accessible via the
/// global address space.
- virtual bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS) const = 0;
+ bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS) const;
public:
@@ -407,10 +407,6 @@ class SIGfx6CacheControl : public SICacheControl {
return enableNamedBit(MI, AMDGPU::CPol::SLC);
}
- bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS) const override {
- return (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE;
- }
-
public:
SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {}
@@ -617,15 +613,6 @@ class SIGfx12CacheControl : public SIGfx11CacheControl {
bool setAtomicScope(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const;
- bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS) const override {
- assert((!ST.hasGloballyAddressableScratch() ||
- ((AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) ||
- (AS & SIAtomicAddrSpace::SCRATCH) == SIAtomicAddrSpace::NONE) &&
- "scratch instructions should already be replaced by flat "
- "instructions if GloballyAddressableScratch is enabled");
- return (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE;
- }
-
public:
SIGfx12CacheControl(const GCNSubtarget &ST) : SIGfx11CacheControl(ST) {
// GFX12.0 and GFX12.5 memory models greatly overlap, and in some cases
@@ -1008,6 +995,15 @@ bool SICacheControl::enableNamedBit(const MachineBasicBlock::iterator MI,
return true;
}
+bool SICacheControl::canAffectGlobalAddrSpace(SIAtomicAddrSpace AS) const {
+ assert((!ST.hasGloballyAddressableScratch() ||
+ (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ||
+ (AS & SIAtomicAddrSpace::SCRATCH) == SIAtomicAddrSpace::NONE) &&
+ "scratch instructions should already be replaced by flat "
+ "instructions if GloballyAddressableScratch is enabled");
+ return (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE;
+}
+
/* static */
std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
GCNSubtarget::Generation Generation = ST.getGeneration();
More information about the llvm-commits
mailing list