[llvm] 907d799 - [AMDGPU] Cleanup memory legalizer interfaces
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 13 23:27:54 PDT 2020
Author: Tony
Date: 2020-10-14T06:07:51Z
New Revision: 907d799070c351b3da0b0f4b9669f230d46e6d8d
URL: https://github.com/llvm/llvm-project/commit/907d799070c351b3da0b0f4b9669f230d46e6d8d
DIFF: https://github.com/llvm/llvm-project/commit/907d799070c351b3da0b0f4b9669f230d46e6d8d.diff
LOG: [AMDGPU] Cleanup memory legalizer interfaces
- Rename interfaces to be in terms of acquire and release.
- Improve comments.
Differential Revision: https://reviews.llvm.org/D89355
Added:
Modified:
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 21419aab1a43..c03709194917 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -280,23 +280,23 @@ class SICacheControl {
virtual bool enableNonTemporal(const MachineBasicBlock::iterator &MI)
const = 0;
- /// Inserts any necessary instructions at position \p Pos relative
- /// to instruction \p MI to ensure any caches associated with
- /// address spaces \p AddrSpace for memory scopes up to memory scope
- /// \p Scope are invalidated. Returns true iff any instructions
- /// inserted.
- virtual bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
- SIAtomicScope Scope,
- SIAtomicAddrSpace AddrSpace,
- Position Pos) const = 0;
+ /// Inserts any necessary instructions at position \p Pos relative to
+ /// instruction \p MI to ensure any subsequent memory instructions of this
+ /// thread with address spaces \p AddrSpace will observe the previous memory
+ /// operations by any thread for memory scopes up to memory scope \p Scope .
+ /// Returns true iff any instructions inserted.
+ virtual bool insertAcquire(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const = 0;
/// Inserts any necessary instructions at position \p Pos relative
- /// to instruction \p MI to ensure memory instructions of kind \p Op
- /// associated with address spaces \p AddrSpace have completed as
- /// observed by other memory instructions executing in memory scope
- /// \p Scope. \p IsCrossAddrSpaceOrdering indicates if the memory
- /// ordering is between address spaces. Returns true iff any
- /// instructions inserted.
+ /// to instruction \p MI to ensure memory instructions before \p Pos of kind
+ /// \p Op associated with address spaces \p AddrSpace have completed. Used
+ /// between memory instructions to enforce the order they become visible as
+ /// observed by other memory instructions executing in memory scope \p Scope.
+ /// \p IsCrossAddrSpaceOrdering indicates if the memory ordering is between
+ /// address spaces. Returns true iff any instructions inserted.
virtual bool insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
@@ -304,6 +304,18 @@ class SICacheControl {
bool IsCrossAddrSpaceOrdering,
Position Pos) const = 0;
+ /// Inserts any necessary instructions at position \p Pos relative to
+ /// instruction \p MI to ensure previous memory instructions by this thread
+ /// with address spaces \p AddrSpace have completed and can be observed by
+ /// subsequent memory instructions by any thread executing in memory scope \p
+ /// Scope. \p IsCrossAddrSpaceOrdering indicates if the memory ordering is
+ /// between address spaces. Returns true iff any instructions inserted.
+ virtual bool insertRelease(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ bool IsCrossAddrSpaceOrdering,
+ Position Pos) const = 0;
+
/// Virtual destructor to allow derivations to be deleted.
virtual ~SICacheControl() = default;
@@ -334,10 +346,16 @@ class SIGfx6CacheControl : public SICacheControl {
bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
- bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
- SIAtomicScope Scope,
- SIAtomicAddrSpace AddrSpace,
- Position Pos) const override;
+ bool insertAcquire(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const override;
+
+ bool insertRelease(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ bool IsCrossAddrSpaceOrdering,
+ Position Pos) const override;
bool insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
@@ -352,10 +370,10 @@ class SIGfx7CacheControl : public SIGfx6CacheControl {
SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {};
- bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
- SIAtomicScope Scope,
- SIAtomicAddrSpace AddrSpace,
- Position Pos) const override;
+ bool insertAcquire(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const override;
};
@@ -380,10 +398,10 @@ class SIGfx10CacheControl : public SIGfx7CacheControl {
bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
- bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
- SIAtomicScope Scope,
- SIAtomicAddrSpace AddrSpace,
- Position Pos) const override;
+ bool insertAcquire(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const override;
bool insertWait(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
@@ -678,9 +696,6 @@ bool SIGfx6CacheControl::enableLoadCacheBypass(
bool Changed = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
- /// TODO: Do not set glc for rmw atomic operations as they
- /// implicitly bypass the L1 cache.
-
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
@@ -718,10 +733,10 @@ bool SIGfx6CacheControl::enableNonTemporal(
return Changed;
}
-bool SIGfx6CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
- SIAtomicScope Scope,
- SIAtomicAddrSpace AddrSpace,
- Position Pos) const {
+bool SIGfx6CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const {
if (!InsertCacheInv)
return false;
@@ -802,12 +817,12 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
case SIAtomicScope::WORKGROUP:
- // If no cross address space ordering then an LDS waitcnt is not
- // needed as LDS operations for all waves are executed in a
- // total global ordering as observed by all waves. Required if
- // also synchronizing with global/GDS memory as LDS operations
- // could be reordered with respect to later global/GDS memory
- // operations of the same wave.
+ // If no cross address space ordering then an "S_WAITCNT lgkmcnt(0)" is
+ // not needed as LDS operations for all waves are executed in a total
+ // global ordering as observed by all waves. Required if also
+ // synchronizing with global/GDS memory as LDS operations could be
+ // reordered with respect to later global/GDS memory operations of the
+ // same wave.
LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WAVEFRONT:
@@ -824,12 +839,12 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
- // If no cross address space ordering then an GDS waitcnt is not
- // needed as GDS operations for all waves are executed in a
- // total global ordering as observed by all waves. Required if
- // also synchronizing with global/LDS memory as GDS operations
- // could be reordered with respect to later global/LDS memory
- // operations of the same wave.
+ // If no cross address space ordering then an GDS "S_WAITCNT lgkmcnt(0)"
+ // is not needed as GDS operations for all waves are executed in a total
+ // global ordering as observed by all waves. Required if also
+ // synchronizing with global/LDS memory as GDS operations could be
+ // reordered with respect to later global/LDS memory operations of the
+ // same wave.
LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WORKGROUP:
@@ -859,10 +874,19 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
return Changed;
}
-bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
- SIAtomicScope Scope,
- SIAtomicAddrSpace AddrSpace,
- Position Pos) const {
+bool SIGfx6CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ bool IsCrossAddrSpaceOrdering,
+ Position Pos) const {
+ return insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
+ IsCrossAddrSpaceOrdering, Pos);
+}
+
+bool SIGfx7CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const {
if (!InsertCacheInv)
return false;
@@ -873,9 +897,9 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
const GCNSubtarget &STM = MBB.getParent()->getSubtarget<GCNSubtarget>();
- const unsigned Flush = STM.isAmdPalOS() || STM.isMesa3DOS()
- ? AMDGPU::BUFFER_WBINVL1
- : AMDGPU::BUFFER_WBINVL1_VOL;
+ const unsigned InvalidateL1 = STM.isAmdPalOS() || STM.isMesa3DOS()
+ ? AMDGPU::BUFFER_WBINVL1
+ : AMDGPU::BUFFER_WBINVL1_VOL;
if (Pos == Position::AFTER)
++MI;
@@ -884,7 +908,7 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
- BuildMI(MBB, MI, DL, TII->get(Flush));
+ BuildMI(MBB, MI, DL, TII->get(InvalidateL1));
Changed = true;
break;
case SIAtomicScope::WORKGROUP:
@@ -902,7 +926,7 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
/// sequentially consistent, and no other thread can access scratch
/// memory.
- /// Other address spaces do not hava a cache.
+ /// Other address spaces do not have a cache.
if (Pos == Position::AFTER)
--MI;
@@ -930,8 +954,8 @@ bool SIGfx10CacheControl::enableLoadCacheBypass(
case SIAtomicScope::WORKGROUP:
// In WGP mode the waves of a work-group can be executing on either CU of
// the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
- // CU mode and all waves of a work-group are on the same CU, and so the
- // L0 does not need to be bypassed.
+ // CU mode all waves of a work-group are on the same CU, and so the L0
+ // does not need to be bypassed.
if (!CuMode) Changed |= enableGLCBit(MI);
break;
case SIAtomicScope::WAVEFRONT:
@@ -964,10 +988,10 @@ bool SIGfx10CacheControl::enableNonTemporal(
return Changed;
}
-bool SIGfx10CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
- SIAtomicScope Scope,
- SIAtomicAddrSpace AddrSpace,
- Position Pos) const {
+bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const {
if (!InsertCacheInv)
return false;
@@ -1074,12 +1098,12 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
case SIAtomicScope::WORKGROUP:
- // If no cross address space ordering then an LDS waitcnt is not
- // needed as LDS operations for all waves are executed in a
- // total global ordering as observed by all waves. Required if
- // also synchronizing with global/GDS memory as LDS operations
- // could be reordered with respect to later global/GDS memory
- // operations of the same wave.
+ // If no cross address space ordering then an "S_WAITCNT lgkmcnt(0)" is
+ // not needed as LDS operations for all waves are executed in a total
+ // global ordering as observed by all waves. Required if also
+ // synchronizing with global/GDS memory as LDS operations could be
+ // reordered with respect to later global/GDS memory operations of the
+ // same wave.
LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WAVEFRONT:
@@ -1096,12 +1120,12 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
- // If no cross address space ordering then an GDS waitcnt is not
- // needed as GDS operations for all waves are executed in a
- // total global ordering as observed by all waves. Required if
- // also synchronizing with global/LDS memory as GDS operations
- // could be reordered with respect to later global/LDS memory
- // operations of the same wave.
+ // If no cross address space ordering then an GDS "S_WAITCNT lgkmcnt(0)"
+ // is not needed as GDS operations for all waves are executed in a total
+ // global ordering as observed by all waves. Required if also
+ // synchronizing with global/LDS memory as GDS operations could be
+ // reordered with respect to later global/LDS memory operations of the
+ // same wave.
LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WORKGROUP:
@@ -1177,9 +1201,9 @@ bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
SIMemOp::LOAD,
MOI.getIsCrossAddressSpaceOrdering(),
Position::AFTER);
- Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
- MOI.getOrderingAddrSpace(),
- Position::AFTER);
+ Changed |= CC->insertAcquire(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ Position::AFTER);
}
return Changed;
@@ -1203,11 +1227,10 @@ bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
if (MOI.isAtomic()) {
if (MOI.getOrdering() == AtomicOrdering::Release ||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
- Changed |= CC->insertWait(MI, MOI.getScope(),
- MOI.getOrderingAddrSpace(),
- SIMemOp::LOAD | SIMemOp::STORE,
- MOI.getIsCrossAddressSpaceOrdering(),
- Position::BEFORE);
+ Changed |= CC->insertRelease(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ MOI.getIsCrossAddressSpaceOrdering(),
+ Position::BEFORE);
return Changed;
}
@@ -1239,19 +1262,23 @@ bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
/// ordering and memory scope, then library does not need to
/// generate a fence. Could add support in this file for
/// barrier. SIInsertWaitcnt.cpp could then stop unconditionally
- /// adding waitcnt before a S_BARRIER.
- Changed |= CC->insertWait(MI, MOI.getScope(),
- MOI.getOrderingAddrSpace(),
- SIMemOp::LOAD | SIMemOp::STORE,
- MOI.getIsCrossAddressSpaceOrdering(),
- Position::BEFORE);
+ /// adding S_WAITCNT before a S_BARRIER.
+ Changed |= CC->insertRelease(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ MOI.getIsCrossAddressSpaceOrdering(),
+ Position::BEFORE);
+
+ // TODO: If both release and invalidate are happening they could be combined
+ // to use the single "BUFFER_WBL2" instruction. This could be done by
+ // reorganizing this code or as part of optimizing SIInsertWaitcnt pass to
+ // track cache invalidate and write back instructions.
if (MOI.getOrdering() == AtomicOrdering::Acquire ||
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
- Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
- MOI.getOrderingAddrSpace(),
- Position::BEFORE);
+ Changed |= CC->insertAcquire(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ Position::BEFORE);
return Changed;
}
@@ -1270,11 +1297,10 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
- Changed |= CC->insertWait(MI, MOI.getScope(),
- MOI.getOrderingAddrSpace(),
- SIMemOp::LOAD | SIMemOp::STORE,
- MOI.getIsCrossAddressSpaceOrdering(),
- Position::BEFORE);
+ Changed |= CC->insertRelease(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ MOI.getIsCrossAddressSpaceOrdering(),
+ Position::BEFORE);
if (MOI.getOrdering() == AtomicOrdering::Acquire ||
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
@@ -1287,9 +1313,9 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
SIMemOp::STORE,
MOI.getIsCrossAddressSpaceOrdering(),
Position::AFTER);
- Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
- MOI.getOrderingAddrSpace(),
- Position::AFTER);
+ Changed |= CC->insertAcquire(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ Position::AFTER);
}
return Changed;
More information about the llvm-commits
mailing list