[llvm] r334241 - [AMDGPU] Simplify memory legalizer
Tony Tye via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 7 15:28:32 PDT 2018
Author: t-tye
Date: Thu Jun 7 15:28:32 2018
New Revision: 334241
URL: http://llvm.org/viewvc/llvm-project?rev=334241&view=rev
Log:
[AMDGPU] Simplify memory legalizer
- Make code easier to maintain.
- Avoid generating waitcnts for VMEM if the address sppace does not involve VMEM.
- Add support to generate waitcnts for LDS and GDS memory.
Differential Revision: https://reviews.llvm.org/D47504
Added:
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
Modified: llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp?rev=334241&r1=334240&r2=334241&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp Thu Jun 7 15:28:32 2018
@@ -21,6 +21,7 @@
#include "SIInstrInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -37,6 +38,7 @@
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/MathExtras.h"
#include <cassert>
#include <list>
@@ -48,42 +50,142 @@ using namespace llvm::AMDGPU;
namespace {
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+
+/// Memory operation flags. Can be ORed together.
+enum class SIMemOp {
+ NONE = 0u,
+ LOAD = 1u << 0,
+ STORE = 1u << 1,
+ LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ STORE)
+};
+
+/// Position to insert a new instruction relative to an existing
+/// instruction.
+enum class Position {
+ BEFORE,
+ AFTER
+};
+
+/// The atomic synchronization scopes supported by the AMDGPU target.
+enum class SIAtomicScope {
+ NONE,
+ SINGLETHREAD,
+ WAVEFRONT,
+ WORKGROUP,
+ AGENT,
+ SYSTEM
+};
+
+/// The distinct address spaces supported by the AMDGPU target for
+/// atomic memory operation. Can be ORed toether.
+enum class SIAtomicAddrSpace {
+ NONE = 0u,
+ GLOBAL = 1u << 0,
+ LDS = 1u << 1,
+ SCRATCH = 1u << 2,
+ GDS = 1u << 3,
+ OTHER = 1u << 4,
+
+ /// The address spaces that can be accessed by a FLAT instruction.
+ FLAT = GLOBAL | LDS | SCRATCH,
+
+ /// The address spaces that support atomic instructions.
+ ATOMIC = GLOBAL | LDS | SCRATCH | GDS,
+
+ /// All address spaces.
+ ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,
+
+ LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
+};
+
+/// Sets named bit \p BitName to "true" if present in instruction \p MI.
+/// \returns Returns true if \p MI is modified, false otherwise.
+template <uint16_t BitName>
+bool enableNamedBit(const MachineBasicBlock::iterator &MI) {
+ int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName);
+ if (BitIdx == -1)
+ return false;
+
+ MachineOperand &Bit = MI->getOperand(BitIdx);
+ if (Bit.getImm() != 0)
+ return false;
+
+ Bit.setImm(1);
+ return true;
+}
+
class SIMemOpInfo final {
private:
- SyncScope::ID SSID = SyncScope::System;
+
+ friend class SIMemOpAccess;
+
AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
+ SIAtomicScope Scope = SIAtomicScope::SYSTEM;
+ SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
+ SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
+ bool IsCrossAddressSpaceOrdering = false;
bool IsNonTemporal = false;
- SIMemOpInfo(SyncScope::ID SSID, AtomicOrdering Ordering)
- : SSID(SSID), Ordering(Ordering) {}
-
- SIMemOpInfo(SyncScope::ID SSID, AtomicOrdering Ordering,
- AtomicOrdering FailureOrdering, bool IsNonTemporal = false)
- : SSID(SSID), Ordering(Ordering), FailureOrdering(FailureOrdering),
- IsNonTemporal(IsNonTemporal) {}
-
- /// \returns Info constructed from \p MI, which has at least machine memory
- /// operand.
- static Optional<SIMemOpInfo> constructFromMIWithMMO(
- const MachineBasicBlock::iterator &MI);
+ SIMemOpInfo(AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
+ SIAtomicScope Scope = SIAtomicScope::SYSTEM,
+ SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
+ SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
+ bool IsCrossAddressSpaceOrdering = true,
+ AtomicOrdering FailureOrdering =
+ AtomicOrdering::SequentiallyConsistent,
+ bool IsNonTemporal = false)
+ : Ordering(Ordering), FailureOrdering(FailureOrdering),
+ Scope(Scope), OrderingAddrSpace(OrderingAddrSpace),
+ InstrAddrSpace(InstrAddrSpace),
+ IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
+ IsNonTemporal(IsNonTemporal) {
+ // There is also no cross address space ordering if the ordering
+ // address space is the same as the instruction address space and
+ // only contains a single address space.
+ if ((OrderingAddrSpace == InstrAddrSpace) &&
+ isPowerOf2_32(uint32_t(InstrAddrSpace)))
+ IsCrossAddressSpaceOrdering = false;
+ }
public:
- /// \returns Synchronization scope ID of the machine instruction used to
+ /// \returns Atomic synchronization scope of the machine instruction used to
/// create this SIMemOpInfo.
- SyncScope::ID getSSID() const {
- return SSID;
+ SIAtomicScope getScope() const {
+ return Scope;
}
+
/// \returns Ordering constraint of the machine instruction used to
/// create this SIMemOpInfo.
AtomicOrdering getOrdering() const {
return Ordering;
}
+
/// \returns Failure ordering constraint of the machine instruction used to
/// create this SIMemOpInfo.
AtomicOrdering getFailureOrdering() const {
return FailureOrdering;
}
+
+ /// \returns The address spaces be accessed by the machine
+ /// instruction used to create this SiMemOpInfo.
+ SIAtomicAddrSpace getInstrAddrSpace() const {
+ return InstrAddrSpace;
+ }
+
+ /// \returns The address spaces that must be ordered by the machine
+ /// instruction used to create this SiMemOpInfo.
+ SIAtomicAddrSpace getOrderingAddrSpace() const {
+ return OrderingAddrSpace;
+ }
+
+ /// \returns Return true iff memory ordering of operations on
+ /// different address spaces is required.
+ bool getIsCrossAddressSpaceOrdering() const {
+ return IsCrossAddressSpaceOrdering;
+ }
+
/// \returns True if memory access of the machine instruction used to
/// create this SIMemOpInfo is non-temporal, false otherwise.
bool isNonTemporal() const {
@@ -96,59 +198,111 @@ public:
return Ordering != AtomicOrdering::NotAtomic;
}
+};
+
+class SIMemOpAccess final {
+private:
+
+ AMDGPUAS SIAddrSpaceInfo;
+ AMDGPUMachineModuleInfo *MMI = nullptr;
+
+ /// Reports unsupported message \p Msg for \p MI to LLVM context.
+ void reportUnsupported(const MachineBasicBlock::iterator &MI,
+ const char *Msg) const;
+
+ /// Inspects the target synchonization scope \p SSID and determines
+ /// the SI atomic scope it corresponds to, the address spaces it
+ /// covers, and whether the memory ordering applies between address
+ /// spaces.
+ Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
+ toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrScope) const;
+
+ /// \return Return a bit set of the address spaces accessed by \p AS.
+ SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;
+
+ /// \returns Info constructed from \p MI, which has at least machine memory
+ /// operand.
+ Optional<SIMemOpInfo> constructFromMIWithMMO(
+ const MachineBasicBlock::iterator &MI) const;
+
+public:
+ /// Construct class to support accessing the machine memory operands
+ /// of instructions in the machine function \p MF.
+ SIMemOpAccess(MachineFunction &MF);
+
/// \returns Load info if \p MI is a load operation, "None" otherwise.
- static Optional<SIMemOpInfo> getLoadInfo(
- const MachineBasicBlock::iterator &MI);
+ Optional<SIMemOpInfo> getLoadInfo(
+ const MachineBasicBlock::iterator &MI) const;
+
/// \returns Store info if \p MI is a store operation, "None" otherwise.
- static Optional<SIMemOpInfo> getStoreInfo(
- const MachineBasicBlock::iterator &MI);
+ Optional<SIMemOpInfo> getStoreInfo(
+ const MachineBasicBlock::iterator &MI) const;
+
/// \returns Atomic fence info if \p MI is an atomic fence operation,
/// "None" otherwise.
- static Optional<SIMemOpInfo> getAtomicFenceInfo(
- const MachineBasicBlock::iterator &MI);
+ Optional<SIMemOpInfo> getAtomicFenceInfo(
+ const MachineBasicBlock::iterator &MI) const;
+
/// \returns Atomic cmpxchg/rmw info if \p MI is an atomic cmpxchg or
/// rmw operation, "None" otherwise.
- static Optional<SIMemOpInfo> getAtomicCmpxchgOrRmwInfo(
- const MachineBasicBlock::iterator &MI);
-
- /// Reports unknown synchronization scope used in \p MI to LLVM
- /// context.
- static void reportUnknownSyncScope(
- const MachineBasicBlock::iterator &MI);
+ Optional<SIMemOpInfo> getAtomicCmpxchgOrRmwInfo(
+ const MachineBasicBlock::iterator &MI) const;
};
-class SIMemoryLegalizer final : public MachineFunctionPass {
-private:
- /// Machine module info.
- const AMDGPUMachineModuleInfo *MMI = nullptr;
+class SICacheControl {
+protected:
/// Instruction info.
const SIInstrInfo *TII = nullptr;
- /// Immediate for "vmcnt(0)".
- unsigned Vmcnt0Immediate = 0;
+ IsaInfo::IsaVersion IV;
- /// Opcode for cache invalidation instruction (L1).
- unsigned VmemSIMDCacheInvalidateOpc = 0;
+ SICacheControl(const SISubtarget &ST);
- /// List of atomic pseudo instructions.
- std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
+public:
- /// Sets named bit (BitName) to "true" if present in \p MI. Returns
- /// true if \p MI is modified, false otherwise.
- template <uint16_t BitName>
- bool enableNamedBit(const MachineBasicBlock::iterator &MI) const {
- int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName);
- if (BitIdx == -1)
- return false;
-
- MachineOperand &Bit = MI->getOperand(BitIdx);
- if (Bit.getImm() != 0)
- return false;
+ /// Create a cache control for the subtarget \p ST.
+ static std::unique_ptr<SICacheControl> create(const SISubtarget &ST);
- Bit.setImm(1);
- return true;
- }
+ /// Update \p MI memory load instruction to bypass any caches up to
+ /// the \p Scope memory scope for address spaces \p
+ /// AddrSpace. Return true iff the instruction was modified.
+ virtual bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace) const = 0;
+
+ /// Update \p MI memory instruction to indicate it is
+ /// nontemporal. Return true iff the instruction was modified.
+ virtual bool enableNonTemporal(const MachineBasicBlock::iterator &MI)
+ const = 0;
+
+ /// Inserts any necessary instructions at position \p Pos relative
+ /// to instruction \p MI to ensure any caches associated with
+ /// address spaces \p AddrSpace for memory scopes up to memory scope
+ /// \p Scope are invalidated. Returns true iff any instructions
+ /// inserted.
+ virtual bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const = 0;
+
+ /// Inserts any necessary instructions at position \p Pos relative
+ /// to instruction \p MI to ensure memory instructions of kind \p Op
+ /// associated with address spaces \p AddrSpace have completed as
+ /// observed by other memory instructions executing in memory scope
+ /// \p Scope. \p IsCrossAddrSpaceOrdering indicates if the memory
+ /// ordering is between address spaces. Returns true iff any
+ /// instructions inserted.
+ virtual bool insertWait(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ SIMemOp Op,
+ bool IsCrossAddrSpaceOrdering,
+ Position Pos) const = 0;
+};
+
+class SIGfx6CacheControl : public SICacheControl {
+protected:
/// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
/// is modified, false otherwise.
@@ -162,14 +316,55 @@ private:
return enableNamedBit<AMDGPU::OpName::slc>(MI);
}
- /// Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI.
- /// Always returns true.
- bool insertVmemSIMDCacheInvalidate(MachineBasicBlock::iterator &MI,
- bool Before = true) const;
- /// Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI.
- /// Always returns true.
- bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
- bool Before = true) const;
+public:
+
+ SIGfx6CacheControl(const SISubtarget &ST) : SICacheControl(ST) {};
+
+ bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace) const override;
+
+ bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
+
+ bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const override;
+
+ bool insertWait(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ SIMemOp Op,
+ bool IsCrossAddrSpaceOrdering,
+ Position Pos) const override;
+};
+
+class SIGfx7CacheControl : public SIGfx6CacheControl {
+public:
+
+ SIGfx7CacheControl(const SISubtarget &ST) : SIGfx6CacheControl(ST) {};
+
+ bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const override;
+
+};
+
+class SIMemoryLegalizer final : public MachineFunctionPass {
+private:
+
+ /// Cache Control.
+ std::unique_ptr<SICacheControl> CC = nullptr;
+
+ /// List of atomic pseudo instructions.
+ std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
+
+ /// Return true iff instruction \p MI is a atomic instruction that
+ /// returns a result.
+ bool isAtomicRet(const MachineInstr &MI) const {
+ return AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1;
+ }
/// Removes all processed atomic pseudo instructions from the current
/// function. Returns true if current function is modified, false otherwise.
@@ -211,48 +406,129 @@ public:
} // end namespace anonymous
-/* static */
-Optional<SIMemOpInfo> SIMemOpInfo::constructFromMIWithMMO(
- const MachineBasicBlock::iterator &MI) {
- assert(MI->getNumMemOperands() > 0);
+void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
+ const char *Msg) const {
+ const Function &Func = MI->getParent()->getParent()->getFunction();
+ DiagnosticInfoUnsupported Diag(Func, Msg, MI->getDebugLoc());
+ Func.getContext().diagnose(Diag);
+}
+
+Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
+SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
+ SIAtomicAddrSpace InstrScope) const {
+ /// TODO: For now assume OpenCL memory model which treats each
+ /// address space as having a separate happens-before relation, and
+ /// so an instruction only has ordering with respect to the address
+ /// space it accesses, and if it accesses multiple address spaces it
+ /// does not require ordering of operations in different address
+ /// spaces.
+ if (SSID == SyncScope::System)
+ return std::make_tuple(SIAtomicScope::SYSTEM,
+ SIAtomicAddrSpace::ATOMIC & InstrScope,
+ false);
+ if (SSID == MMI->getAgentSSID())
+ return std::make_tuple(SIAtomicScope::AGENT,
+ SIAtomicAddrSpace::ATOMIC & InstrScope,
+ false);
+ if (SSID == MMI->getWorkgroupSSID())
+ return std::make_tuple(SIAtomicScope::WORKGROUP,
+ SIAtomicAddrSpace::ATOMIC & InstrScope,
+ false);
+ if (SSID == MMI->getWavefrontSSID())
+ return std::make_tuple(SIAtomicScope::WAVEFRONT,
+ SIAtomicAddrSpace::ATOMIC & InstrScope,
+ false);
+ if (SSID == SyncScope::SingleThread)
+ return std::make_tuple(SIAtomicScope::SINGLETHREAD,
+ SIAtomicAddrSpace::ATOMIC & InstrScope,
+ false);
+ /// TODO: To support HSA Memory Model need to add additional memory
+ /// scopes that specify that do require cross address space
+ /// ordering.
+ return None;
+}
+
+SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
+ if (AS == SIAddrSpaceInfo.FLAT_ADDRESS)
+ return SIAtomicAddrSpace::FLAT;
+ if (AS == SIAddrSpaceInfo.GLOBAL_ADDRESS)
+ return SIAtomicAddrSpace::GLOBAL;
+ if (AS == SIAddrSpaceInfo.LOCAL_ADDRESS)
+ return SIAtomicAddrSpace::LDS;
+ if (AS == SIAddrSpaceInfo.PRIVATE_ADDRESS)
+ return SIAtomicAddrSpace::SCRATCH;
+ if (AS == SIAddrSpaceInfo.REGION_ADDRESS)
+ return SIAtomicAddrSpace::GDS;
+
+ return SIAtomicAddrSpace::OTHER;
+}
+
+SIMemOpAccess::SIMemOpAccess(MachineFunction &MF) {
+ SIAddrSpaceInfo = getAMDGPUAS(MF.getTarget());
+ MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
+}
- const MachineFunction *MF = MI->getParent()->getParent();
- const AMDGPUMachineModuleInfo *MMI =
- &MF->getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
+Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
+ const MachineBasicBlock::iterator &MI) const {
+ assert(MI->getNumMemOperands() > 0);
SyncScope::ID SSID = SyncScope::SingleThread;
AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
+ SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
bool IsNonTemporal = true;
// Validator should check whether or not MMOs cover the entire set of
// locations accessed by the memory instruction.
for (const auto &MMO : MI->memoperands()) {
- const auto &IsSyncScopeInclusion =
- MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
- if (!IsSyncScopeInclusion) {
- reportUnknownSyncScope(MI);
- return None;
- }
-
- SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID();
- Ordering =
- isStrongerThan(Ordering, MMO->getOrdering()) ?
- Ordering : MMO->getOrdering();
- FailureOrdering =
- isStrongerThan(FailureOrdering, MMO->getFailureOrdering()) ?
- FailureOrdering : MMO->getFailureOrdering();
+ IsNonTemporal &= MMO->isNonTemporal();
+ InstrAddrSpace |=
+ toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
+ AtomicOrdering OpOrdering = MMO->getOrdering();
+ if (OpOrdering != AtomicOrdering::NotAtomic) {
+ const auto &IsSyncScopeInclusion =
+ MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
+ if (!IsSyncScopeInclusion) {
+ reportUnsupported(MI,
+ "Unsupported non-inclusive atomic synchronization scope");
+ return None;
+ }
- if (!(MMO->getFlags() & MachineMemOperand::MONonTemporal))
- IsNonTemporal = false;
+ SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID();
+ Ordering =
+ isStrongerThan(Ordering, OpOrdering) ?
+ Ordering : MMO->getOrdering();
+ assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
+ MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
+ FailureOrdering =
+ isStrongerThan(FailureOrdering, MMO->getFailureOrdering()) ?
+ FailureOrdering : MMO->getFailureOrdering();
+ }
}
- return SIMemOpInfo(SSID, Ordering, FailureOrdering, IsNonTemporal);
+ SIAtomicScope Scope = SIAtomicScope::NONE;
+ SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
+ bool IsCrossAddressSpaceOrdering = false;
+ if (Ordering != AtomicOrdering::NotAtomic) {
+ auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
+ if (!ScopeOrNone) {
+ reportUnsupported(MI, "Unsupported atomic synchronization scope");
+ return None;
+ }
+ std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
+ ScopeOrNone.getValue();
+ if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
+ ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
+ reportUnsupported(MI, "Unsupported atomic address space");
+ return None;
+ }
+ }
+ return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
+ IsCrossAddressSpaceOrdering, FailureOrdering, IsNonTemporal);
}
-/* static */
-Optional<SIMemOpInfo> SIMemOpInfo::getLoadInfo(
- const MachineBasicBlock::iterator &MI) {
+Optional<SIMemOpInfo> SIMemOpAccess::getLoadInfo(
+ const MachineBasicBlock::iterator &MI) const {
assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
if (!(MI->mayLoad() && !MI->mayStore()))
@@ -260,15 +536,13 @@ Optional<SIMemOpInfo> SIMemOpInfo::getLo
// Be conservative if there are no memory operands.
if (MI->getNumMemOperands() == 0)
- return SIMemOpInfo(SyncScope::System,
- AtomicOrdering::SequentiallyConsistent);
+ return SIMemOpInfo();
- return SIMemOpInfo::constructFromMIWithMMO(MI);
+ return constructFromMIWithMMO(MI);
}
-/* static */
-Optional<SIMemOpInfo> SIMemOpInfo::getStoreInfo(
- const MachineBasicBlock::iterator &MI) {
+Optional<SIMemOpInfo> SIMemOpAccess::getStoreInfo(
+ const MachineBasicBlock::iterator &MI) const {
assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
if (!(!MI->mayLoad() && MI->mayStore()))
@@ -276,30 +550,46 @@ Optional<SIMemOpInfo> SIMemOpInfo::getSt
// Be conservative if there are no memory operands.
if (MI->getNumMemOperands() == 0)
- return SIMemOpInfo(SyncScope::System,
- AtomicOrdering::SequentiallyConsistent);
+ return SIMemOpInfo();
- return SIMemOpInfo::constructFromMIWithMMO(MI);
+ return constructFromMIWithMMO(MI);
}
-/* static */
-Optional<SIMemOpInfo> SIMemOpInfo::getAtomicFenceInfo(
- const MachineBasicBlock::iterator &MI) {
+Optional<SIMemOpInfo> SIMemOpAccess::getAtomicFenceInfo(
+ const MachineBasicBlock::iterator &MI) const {
assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
return None;
- SyncScope::ID SSID =
- static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
AtomicOrdering Ordering =
- static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
- return SIMemOpInfo(SSID, Ordering);
+ static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
+
+ SyncScope::ID SSID = static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
+ auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
+ if (!ScopeOrNone) {
+ reportUnsupported(MI, "Unsupported atomic synchronization scope");
+ return None;
+ }
+
+ SIAtomicScope Scope = SIAtomicScope::NONE;
+ SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
+ bool IsCrossAddressSpaceOrdering = false;
+ std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
+ ScopeOrNone.getValue();
+
+ if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
+ ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
+ reportUnsupported(MI, "Unsupported atomic address space");
+ return None;
+ }
+
+ return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
+ IsCrossAddressSpaceOrdering);
}
-/* static */
-Optional<SIMemOpInfo> SIMemOpInfo::getAtomicCmpxchgOrRmwInfo(
- const MachineBasicBlock::iterator &MI) {
+Optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
+ const MachineBasicBlock::iterator &MI) const {
assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
if (!(MI->mayLoad() && MI->mayStore()))
@@ -307,52 +597,251 @@ Optional<SIMemOpInfo> SIMemOpInfo::getAt
// Be conservative if there are no memory operands.
if (MI->getNumMemOperands() == 0)
- return SIMemOpInfo(SyncScope::System,
- AtomicOrdering::SequentiallyConsistent,
- AtomicOrdering::SequentiallyConsistent);
+ return SIMemOpInfo();
- return SIMemOpInfo::constructFromMIWithMMO(MI);
+ return constructFromMIWithMMO(MI);
+}
+
+SICacheControl::SICacheControl(const SISubtarget &ST) {
+ TII = ST.getInstrInfo();
+ IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
}
/* static */
-void SIMemOpInfo::reportUnknownSyncScope(
- const MachineBasicBlock::iterator &MI) {
- DiagnosticInfoUnsupported Diag(MI->getParent()->getParent()->getFunction(),
- "Unsupported synchronization scope");
- LLVMContext *CTX = &MI->getParent()->getParent()->getFunction().getContext();
- CTX->diagnose(Diag);
+std::unique_ptr<SICacheControl> SICacheControl::create(const SISubtarget &ST) {
+ AMDGPUSubtarget::Generation Generation = ST.getGeneration();
+ if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ return make_unique<SIGfx6CacheControl>(ST);
+ return make_unique<SIGfx7CacheControl>(ST);
+}
+
+bool SIGfx6CacheControl::enableLoadCacheBypass(
+ const MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace) const {
+ assert(MI->mayLoad() && !MI->mayStore());
+ bool Changed = false;
+
+ if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ /// TODO: Do not set glc for rmw atomic operations as they
+ /// implicitly bypass the L1 cache.
+
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ case SIAtomicScope::AGENT:
+ Changed |= enableGLCBit(MI);
+ break;
+ case SIAtomicScope::WORKGROUP:
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // No cache to bypass.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ /// The scratch address space does not need the global memory caches
+ /// to be bypassed as all memory operations by the same thread are
+ /// sequentially consistent, and no other thread can access scratch
+ /// memory.
+
+ /// Other address spaces do not hava a cache.
+
+ return Changed;
+}
+
+bool SIGfx6CacheControl::enableNonTemporal(
+ const MachineBasicBlock::iterator &MI) const {
+ assert(MI->mayLoad() ^ MI->mayStore());
+ bool Changed = false;
+
+ /// TODO: Do not enableGLCBit if rmw atomic.
+ Changed |= enableGLCBit(MI);
+ Changed |= enableSLCBit(MI);
+
+ return Changed;
}
-bool SIMemoryLegalizer::insertVmemSIMDCacheInvalidate(
- MachineBasicBlock::iterator &MI, bool Before) const {
+bool SIGfx6CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const {
+ bool Changed = false;
+
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
- if (!Before)
+ if (Pos == Position::AFTER)
++MI;
- BuildMI(MBB, MI, DL, TII->get(VmemSIMDCacheInvalidateOpc));
+ if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ case SIAtomicScope::AGENT:
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1));
+ Changed = true;
+ break;
+ case SIAtomicScope::WORKGROUP:
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // No cache to invalidate.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ /// The scratch address space does not need the global memory cache
+ /// to be flushed as all memory operations by the same thread are
+ /// sequentially consistent, and no other thread can access scratch
+ /// memory.
- if (!Before)
+ /// Other address spaces do not hava a cache.
+
+ if (Pos == Position::AFTER)
--MI;
- return true;
+ return Changed;
}
-bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
- bool Before) const {
+bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ SIMemOp Op,
+ bool IsCrossAddrSpaceOrdering,
+ Position Pos) const {
+ bool Changed = false;
+
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
- if (!Before)
+ if (Pos == Position::AFTER)
++MI;
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate);
+ bool VMCnt = false;
+ bool LGKMCnt = false;
+ bool EXPCnt = false;
+
+ if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ case SIAtomicScope::AGENT:
+ VMCnt = true;
+ break;
+ case SIAtomicScope::WORKGROUP:
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // The L1 cache keeps all memory operations in order for
+ // wavesfronts in the same work-group.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
- if (!Before)
+ if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ case SIAtomicScope::AGENT:
+ case SIAtomicScope::WORKGROUP:
+ // If no cross address space ordering then an LDS waitcnt is not
+ // needed as LDS operations for all waves are executed in a
+ // total global ordering as observed by all waves. Required if
+ // also synchronizing with global/GDS memory as LDS operations
+ // could be reordered with respect to later global/GDS memory
+ // operations of the same wave.
+ LGKMCnt = IsCrossAddrSpaceOrdering;
+ break;
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // The LDS keeps all memory operations in order for
+ // the same wavesfront.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ case SIAtomicScope::AGENT:
+ // If no cross address space ordering then an GDS waitcnt is not
+ // needed as GDS operations for all waves are executed in a
+ // total global ordering as observed by all waves. Required if
+ // also synchronizing with global/LDS memory as GDS operations
+ // could be reordered with respect to later global/LDS memory
+ // operations of the same wave.
+ EXPCnt = IsCrossAddrSpaceOrdering;
+ break;
+ case SIAtomicScope::WORKGROUP:
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // The GDS keeps all memory operations in order for
+ // the same work-group.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ if (VMCnt || LGKMCnt || EXPCnt) {
+ unsigned WaitCntImmediate =
+ AMDGPU::encodeWaitcnt(IV,
+ VMCnt ? 0 : getVmcntBitMask(IV),
+ EXPCnt ? 0 : getExpcntBitMask(IV),
+ LGKMCnt ? 0 : getLgkmcntBitMask(IV));
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
+ Changed = true;
+ }
+
+ if (Pos == Position::AFTER)
--MI;
- return true;
+ return Changed;
+}
+
+bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const {
+ bool Changed = false;
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ if (Pos == Position::AFTER)
+ ++MI;
+
+ if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ case SIAtomicScope::AGENT:
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1_VOL));
+ Changed = true;
+ break;
+ case SIAtomicScope::WORKGROUP:
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // No cache to invalidate.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ /// The scratch address space does not need the global memory cache
+ /// to be flushed as all memory operations by the same thread are
+ /// sequentially consistent, and no other thread can access scratch
+ /// memory.
+
+ /// Other address spaces do not hava a cache.
+
+ if (Pos == Position::AFTER)
+ --MI;
+
+ return Changed;
}
bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
@@ -373,38 +862,38 @@ bool SIMemoryLegalizer::expandLoad(const
bool Changed = false;
if (MOI.isAtomic()) {
- if (MOI.getSSID() == SyncScope::System ||
- MOI.getSSID() == MMI->getAgentSSID()) {
- if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
- MOI.getOrdering() == AtomicOrdering::Acquire ||
- MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
- Changed |= enableGLCBit(MI);
-
- if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
- Changed |= insertWaitcntVmcnt0(MI);
-
- if (MOI.getOrdering() == AtomicOrdering::Acquire ||
- MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
- Changed |= insertWaitcntVmcnt0(MI, false);
- Changed |= insertVmemSIMDCacheInvalidate(MI, false);
- }
-
- return Changed;
+ if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
+ MOI.getOrdering() == AtomicOrdering::Acquire ||
+ MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
+ Changed |= CC->enableLoadCacheBypass(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace());
}
- if (MOI.getSSID() == SyncScope::SingleThread ||
- MOI.getSSID() == MMI->getWorkgroupSSID() ||
- MOI.getSSID() == MMI->getWavefrontSSID()) {
- return Changed;
+ if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
+ Changed |= CC->insertWait(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ SIMemOp::LOAD | SIMemOp::STORE,
+ MOI.getIsCrossAddressSpaceOrdering(),
+ Position::BEFORE);
+
+ if (MOI.getOrdering() == AtomicOrdering::Acquire ||
+ MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
+ Changed |= CC->insertWait(MI, MOI.getScope(),
+ MOI.getInstrAddrSpace(),
+ SIMemOp::LOAD,
+ MOI.getIsCrossAddressSpaceOrdering(),
+ Position::AFTER);
+ Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ Position::AFTER);
}
- llvm_unreachable("Unsupported synchronization scope");
+ return Changed;
}
// Atomic instructions do not have the nontemporal attribute.
if (MOI.isNonTemporal()) {
- Changed |= enableGLCBit(MI);
- Changed |= enableSLCBit(MI);
+ Changed |= CC->enableNonTemporal(MI);
return Changed;
}
@@ -418,28 +907,20 @@ bool SIMemoryLegalizer::expandStore(cons
bool Changed = false;
if (MOI.isAtomic()) {
- if (MOI.getSSID() == SyncScope::System ||
- MOI.getSSID() == MMI->getAgentSSID()) {
- if (MOI.getOrdering() == AtomicOrdering::Release ||
- MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
- Changed |= insertWaitcntVmcnt0(MI);
+ if (MOI.getOrdering() == AtomicOrdering::Release ||
+ MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
+ Changed |= CC->insertWait(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ SIMemOp::LOAD | SIMemOp::STORE,
+ MOI.getIsCrossAddressSpaceOrdering(),
+ Position::BEFORE);
- return Changed;
- }
-
- if (MOI.getSSID() == SyncScope::SingleThread ||
- MOI.getSSID() == MMI->getWorkgroupSSID() ||
- MOI.getSSID() == MMI->getWavefrontSSID()) {
- return Changed;
- }
-
- llvm_unreachable("Unsupported synchronization scope");
+ return Changed;
}
// Atomic instructions do not have the nontemporal attribute.
if (MOI.isNonTemporal()) {
- Changed |= enableGLCBit(MI);
- Changed |= enableSLCBit(MI);
+ Changed |= CC->enableNonTemporal(MI);
return Changed;
}
@@ -450,34 +931,35 @@ bool SIMemoryLegalizer::expandAtomicFenc
MachineBasicBlock::iterator &MI) {
assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
+ AtomicPseudoMIs.push_back(MI);
bool Changed = false;
if (MOI.isAtomic()) {
- if (MOI.getSSID() == SyncScope::System ||
- MOI.getSSID() == MMI->getAgentSSID()) {
- if (MOI.getOrdering() == AtomicOrdering::Acquire ||
- MOI.getOrdering() == AtomicOrdering::Release ||
- MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
- MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
- Changed |= insertWaitcntVmcnt0(MI);
-
- if (MOI.getOrdering() == AtomicOrdering::Acquire ||
- MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
- MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
- Changed |= insertVmemSIMDCacheInvalidate(MI);
-
- AtomicPseudoMIs.push_back(MI);
- return Changed;
- }
-
- if (MOI.getSSID() == SyncScope::SingleThread ||
- MOI.getSSID() == MMI->getWorkgroupSSID() ||
- MOI.getSSID() == MMI->getWavefrontSSID()) {
- AtomicPseudoMIs.push_back(MI);
- return Changed;
- }
+ if (MOI.getOrdering() == AtomicOrdering::Acquire ||
+ MOI.getOrdering() == AtomicOrdering::Release ||
+ MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
+ MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
+ /// TODO: This relies on a barrier always generating a waitcnt
+ /// for LDS to ensure it is not reordered with the completion of
+ /// the proceeding LDS operations. If barrier had a memory
+ /// ordering and memory scope, then library does not need to
+ /// generate a fence. Could add support in this file for
+ /// barrier. SIInsertWaitcnt.cpp could then stop unconditionally
+ /// adding waitcnt before a S_BARRIER.
+ Changed |= CC->insertWait(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ SIMemOp::LOAD | SIMemOp::STORE,
+ MOI.getIsCrossAddressSpaceOrdering(),
+ Position::BEFORE);
+
+ if (MOI.getOrdering() == AtomicOrdering::Acquire ||
+ MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
+ MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
+ Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ Position::BEFORE);
- SIMemOpInfo::reportUnknownSyncScope(MI);
+ return Changed;
}
return Changed;
@@ -490,34 +972,33 @@ bool SIMemoryLegalizer::expandAtomicCmpx
bool Changed = false;
if (MOI.isAtomic()) {
- if (MOI.getSSID() == SyncScope::System ||
- MOI.getSSID() == MMI->getAgentSSID()) {
- if (MOI.getOrdering() == AtomicOrdering::Release ||
- MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
- MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
- MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
- Changed |= insertWaitcntVmcnt0(MI);
-
- if (MOI.getOrdering() == AtomicOrdering::Acquire ||
- MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
- MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
- MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
- MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
- Changed |= insertWaitcntVmcnt0(MI, false);
- Changed |= insertVmemSIMDCacheInvalidate(MI, false);
- }
-
- return Changed;
+ if (MOI.getOrdering() == AtomicOrdering::Release ||
+ MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
+ MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
+ MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
+ Changed |= CC->insertWait(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ SIMemOp::LOAD | SIMemOp::STORE,
+ MOI.getIsCrossAddressSpaceOrdering(),
+ Position::BEFORE);
+
+ if (MOI.getOrdering() == AtomicOrdering::Acquire ||
+ MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
+ MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
+ MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
+ MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
+ Changed |= CC->insertWait(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ isAtomicRet(*MI) ? SIMemOp::LOAD :
+ SIMemOp::STORE,
+ MOI.getIsCrossAddressSpaceOrdering(),
+ Position::AFTER);
+ Changed |= CC->insertCacheInvalidate(MI, MOI.getScope(),
+ MOI.getOrderingAddrSpace(),
+ Position::AFTER);
}
- if (MOI.getSSID() == SyncScope::SingleThread ||
- MOI.getSSID() == MMI->getWorkgroupSSID() ||
- MOI.getSSID() == MMI->getWavefrontSSID()) {
- Changed |= enableGLCBit(MI);
- return Changed;
- }
-
- llvm_unreachable("Unsupported synchronization scope");
+ return Changed;
}
return Changed;
@@ -525,30 +1006,22 @@ bool SIMemoryLegalizer::expandAtomicCmpx
bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
- const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
-
- MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
- TII = ST.getInstrInfo();
- Vmcnt0Immediate =
- AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV));
- VmemSIMDCacheInvalidateOpc =
- ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ?
- AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL;
+ SIMemOpAccess MOA(MF);
+ CC = SICacheControl::create(MF.getSubtarget<SISubtarget>());
for (auto &MBB : MF) {
for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
continue;
- if (const auto &MOI = SIMemOpInfo::getLoadInfo(MI))
+ if (const auto &MOI = MOA.getLoadInfo(MI))
Changed |= expandLoad(MOI.getValue(), MI);
- else if (const auto &MOI = SIMemOpInfo::getStoreInfo(MI))
+ else if (const auto &MOI = MOA.getStoreInfo(MI))
Changed |= expandStore(MOI.getValue(), MI);
- else if (const auto &MOI = SIMemOpInfo::getAtomicFenceInfo(MI))
+ else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
Changed |= expandAtomicFence(MOI.getValue(), MI);
- else if (const auto &MOI = SIMemOpInfo::getAtomicCmpxchgOrRmwInfo(MI))
+ else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
Changed |= expandAtomicCmpxchgOrRmw(MOI.getValue(), MI);
}
}
Added: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir?rev=334241&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir Thu Jun 7 15:28:32 2018
@@ -0,0 +1,69 @@
+# RUN: not llc -march=amdgcn -mcpu=gfx803 -run-pass si-memory-legalizer %s -o - 2>&1 | FileCheck -check-prefix=GCN %s
+
+---
+
+# GCN: error: <unknown>:0:0: in function invalid_load void (): Unsupported atomic address space
+
+name: invalid_load
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec
+ renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load seq_cst 4 from `i32 addrspace(42)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN: error: <unknown>:0:0: in function invalid_store void (): Unsupported atomic address space
+
+name: invalid_store
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ $vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent") seq_cst 4 into `i32 addrspace(42)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN: error: <unknown>:0:0: in function invalid_cmpxchg void (): Unsupported atomic address space
+
+name: invalid_cmpxchg
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ $vgpr3 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit-def $vgpr2_vgpr3, implicit $sgpr0_sgpr1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $exec
+ $vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_ATOMIC_CMPSWAP killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("workgroup") seq_cst seq_cst 4 on `i32 addrspace(42)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN: error: <unknown>:0:0: in function invalid_rmw void (): Unsupported atomic address space
+
+name: invalid_rmw
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec
+ $vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ FLAT_ATOMIC_SWAP killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("wavefront") seq_cst 4 on `i32 addrspace(42)* undef`)
+ S_ENDPGM
+
+...
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll?rev=334241&r1=334240&r2=334241&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll Thu Jun 7 15:28:32 2018
@@ -1,14 +1,14 @@
; RUN: not llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s 2>&1 | FileCheck %s
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s 2>&1 | FileCheck %s
-; CHECK: error: <unknown>:0:0: in function invalid_fence void (): Unsupported synchronization scope
+; CHECK: error: <unknown>:0:0: in function invalid_fence void (): Unsupported atomic synchronization scope
define amdgpu_kernel void @invalid_fence() {
entry:
fence syncscope("invalid") seq_cst
ret void
}
-; CHECK: error: <unknown>:0:0: in function invalid_load void (i32*, i32*): Unsupported synchronization scope
+; CHECK: error: <unknown>:0:0: in function invalid_load void (i32*, i32*): Unsupported non-inclusive atomic synchronization scope
define amdgpu_kernel void @invalid_load(
i32* %in, i32* %out) {
entry:
@@ -17,7 +17,7 @@ entry:
ret void
}
-; CHECK: error: <unknown>:0:0: in function invalid_store void (i32, i32*): Unsupported synchronization scope
+; CHECK: error: <unknown>:0:0: in function invalid_store void (i32, i32*): Unsupported non-inclusive atomic synchronization scope
define amdgpu_kernel void @invalid_store(
i32 %in, i32* %out) {
entry:
@@ -25,7 +25,7 @@ entry:
ret void
}
-; CHECK: error: <unknown>:0:0: in function invalid_cmpxchg void (i32*, i32, i32): Unsupported synchronization scope
+; CHECK: error: <unknown>:0:0: in function invalid_cmpxchg void (i32*, i32, i32): Unsupported non-inclusive atomic synchronization scope
define amdgpu_kernel void @invalid_cmpxchg(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -34,7 +34,7 @@ entry:
ret void
}
-; CHECK: error: <unknown>:0:0: in function invalid_rmw void (i32*, i32): Unsupported synchronization scope
+; CHECK: error: <unknown>:0:0: in function invalid_rmw void (i32*, i32): Unsupported non-inclusive atomic synchronization scope
define amdgpu_kernel void @invalid_rmw(
i32* %out, i32 %in) {
entry:
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll?rev=334241&r1=334240&r2=334241&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll Thu Jun 7 15:28:32 2018
@@ -9,7 +9,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_unordered(
i32* %in, i32* %out) {
@@ -23,7 +23,7 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @system_monotonic(
i32* %in, i32* %out) {
@@ -65,7 +65,7 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_unordered(
i32* %in, i32* %out) {
@@ -79,7 +79,7 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_monotonic(
i32* %in, i32* %out) {
@@ -93,7 +93,7 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_acquire(
i32* %in, i32* %out) {
@@ -107,7 +107,7 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @singlethread_seq_cst(
i32* %in, i32* %out) {
@@ -121,7 +121,7 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_unordered(
i32* %in, i32* %out) {
@@ -135,7 +135,7 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @agent_monotonic(
i32* %in, i32* %out) {
@@ -175,9 +175,9 @@ entry:
; GCN-LABEL: {{^}}workgroup_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_unordered(
i32* %in, i32* %out) {
@@ -191,7 +191,7 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @workgroup_monotonic(
i32* %in, i32* %out) {
@@ -233,7 +233,7 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_unordered(
i32* %in, i32* %out) {
@@ -247,7 +247,7 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_monotonic(
i32* %in, i32* %out) {
@@ -261,7 +261,7 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_acquire(
i32* %in, i32* %out) {
@@ -275,7 +275,7 @@ entry:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @wavefront_seq_cst(
i32* %in, i32* %out) {
Added: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir?rev=334241&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir Thu Jun 7 15:28:32 2018
@@ -0,0 +1,1054 @@
+# RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass si-memory-legalizer %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+
+# GCN-LABEL: name: load_singlethread_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_singlethread_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") unordered 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_singlethread_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_singlethread_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") monotonic 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_singlethread_acquire
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_singlethread_acquire
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") acquire 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_singlethread_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_singlethread_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") seq_cst 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_wavefront_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_wavefront_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") unordered 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_wavefront_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_wavefront_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") monotonic 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_wavefront_acquire
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_wavefront_acquire
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") acquire 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_wavefront_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_wavefront_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") seq_cst 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_workgroup_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_workgroup_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") unordered 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_workgroup_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_workgroup_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") monotonic 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_workgroup_acquire
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_workgroup_acquire
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") acquire 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_workgroup_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_workgroup_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") seq_cst 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_agent_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_agent_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") unordered 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_agent_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_agent_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") monotonic 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_agent_acquire
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_agent_acquire
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") acquire 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_agent_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_agent_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") seq_cst 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_system_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_system_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load unordered 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_system_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_system_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load monotonic 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_system_acquire
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_system_acquire
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load acquire 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_system_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_system_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load seq_cst 4 from `i32 addrspace(3)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_singlethread_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_singlethread_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_singlethread_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_singlethread_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_singlethread_release
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_singlethread_release
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_singlethread_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_singlethread_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_wavefront_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_wavefront_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") unordered 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_wavefront_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_wavefront_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") monotonic 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_wavefront_release
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_wavefront_release
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") release 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_wavefront_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_wavefront_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") seq_cst 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_workgroup_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_workgroup_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") unordered 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_workgroup_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_workgroup_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") monotonic 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_workgroup_release
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_workgroup_release
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") release 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_workgroup_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_workgroup_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") seq_cst 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_agent_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_agent_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") unordered 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_agent_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_agent_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") monotonic 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_agent_release
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_agent_release
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") release 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_agent_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_agent_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") seq_cst 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_system_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_system_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store unordered 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_system_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_system_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store monotonic 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_system_release
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_system_release
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store release 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_system_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_system_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store seq_cst 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: atomicrmw_singlethread_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRXCHG_RTN_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: atomicrmw_singlethread_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: atomicrmw_singlethread_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRXCHG_RTN_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: atomicrmw_singlethread_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: atomicrmw_singlethread_acquire
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRXCHG_RTN_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: atomicrmw_singlethread_acquire
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acquire 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: atomicrmw_singlethread_release
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRXCHG_RTN_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: atomicrmw_singlethread_release
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: atomicrmw_singlethread_acq_rel
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRXCHG_RTN_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: atomicrmw_singlethread_acq_rel
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acq_rel 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: atomicrmw_singlethread_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRXCHG_RTN_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: atomicrmw_singlethread_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(3)* undef`)
+ S_ENDPGM
+
+...
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir?rev=334241&r1=334240&r2=334241&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir Thu Jun 7 15:28:32 2018
@@ -1,119 +1,19 @@
-# RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass si-memory-legalizer %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass si-memory-legalizer %s -o - | FileCheck -check-prefix=GCN %s
---- |
- ; ModuleID = 'memory-legalizer-multiple-mem-operands.ll'
- source_filename = "memory-legalizer-multiple-mem-operands.ll"
- target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
-
- define amdgpu_kernel void @multiple_mem_operands(i32 addrspace(1)* %out, i32 %cond, i32 %if_offset, i32 %else_offset) #0 {
- entry:
- %scratch0 = alloca [8192 x i32], addrspace(5)
- %scratch1 = alloca [8192 x i32], addrspace(5)
- %scratchptr01 = bitcast [8192 x i32] addrspace(5)* %scratch0 to i32 addrspace(5)*
- store i32 1, i32 addrspace(5)* %scratchptr01
- %scratchptr12 = bitcast [8192 x i32] addrspace(5)* %scratch1 to i32 addrspace(5)*
- store i32 2, i32 addrspace(5)* %scratchptr12
- %cmp = icmp eq i32 %cond, 0
- br i1 %cmp, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0
-
- if: ; preds = %entry
- %if_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch0, i32 0, i32 %if_offset, !amdgpu.uniform !0
- %if_value = load atomic i32, i32 addrspace(5)* %if_ptr syncscope("workgroup") seq_cst, align 4
- br label %done, !structurizecfg.uniform !0
-
- else: ; preds = %entry
- %else_ptr = getelementptr [8192 x i32], [8192 x i32] addrspace(5)* %scratch1, i32 0, i32 %else_offset, !amdgpu.uniform !0
- %else_value = load atomic i32, i32 addrspace(5)* %else_ptr syncscope("agent") unordered, align 4
- br label %done, !structurizecfg.uniform !0
-
- done: ; preds = %else, %if
- %value = phi i32 [ %if_value, %if ], [ %else_value, %else ]
- store i32 %value, i32 addrspace(1)* %out
- ret void
- }
-
- ; Function Attrs: convergent nounwind
- declare { i1, i64 } @llvm.amdgcn.if(i1) #1
-
- ; Function Attrs: convergent nounwind
- declare { i1, i64 } @llvm.amdgcn.else(i64) #1
-
- ; Function Attrs: convergent nounwind readnone
- declare i64 @llvm.amdgcn.break(i64) #2
-
- ; Function Attrs: convergent nounwind readnone
- declare i64 @llvm.amdgcn.if.break(i1, i64) #2
-
- ; Function Attrs: convergent nounwind readnone
- declare i64 @llvm.amdgcn.else.break(i64, i64) #2
-
- ; Function Attrs: convergent nounwind
- declare i1 @llvm.amdgcn.loop(i64) #1
-
- ; Function Attrs: convergent nounwind
- declare void @llvm.amdgcn.end.cf(i64) #1
-
- attributes #0 = { "target-cpu"="gfx803" }
- attributes #1 = { convergent nounwind }
- attributes #2 = { convergent nounwind readnone }
-
- !0 = !{}
-
-...
---
-# CHECK-LABEL: name: multiple_mem_operands
+# GCN-LABEL: name: multiple_mem_operands
-# CHECK-LABEL: bb.3.done:
-# CHECK: S_WAITCNT 3952
-# CHECK-NEXT: BUFFER_LOAD_DWORD_OFFEN
-# CHECK-NEXT: S_WAITCNT 3952
-# CHECK-NEXT: BUFFER_WBINVL1_VOL
+# GCN-LABEL: bb.3:
+# GCN: S_WAITCNT 3952
+# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
+# GCN-NEXT: S_WAITCNT 3952
+# GCN-NEXT: BUFFER_WBINVL1_VOL
name: multiple_mem_operands
-alignment: 0
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-tracksRegLiveness: true
-registers:
-liveins:
- - { reg: '$sgpr0_sgpr1', virtual-reg: '' }
- - { reg: '$sgpr3', virtual-reg: '' }
-frameInfo:
- isFrameAddressTaken: false
- isReturnAddressTaken: false
- hasStackMap: false
- hasPatchPoint: false
- stackSize: 65540
- offsetAdjustment: 0
- maxAlignment: 4
- adjustsStack: false
- hasCalls: false
- stackProtector: ''
- maxCallFrameSize: 0
- hasOpaqueSPAdjustment: false
- hasVAStart: false
- hasMustTailInVarArgFunc: false
- savePoint: ''
- restorePoint: ''
-fixedStack:
- - { id: 0, type: default, offset: 0, size: 4, alignment: 4, stack-id: 0,
- isImmutable: false, isAliased: false, callee-saved-register: '' }
-stack:
- - { id: 0, name: scratch0, type: default, offset: 4, size: 32768, alignment: 4,
- stack-id: 0, callee-saved-register: '', local-offset: 0,
- debug-info-variable: '', debug-info-expression: '',
- debug-info-location: '' }
- - { id: 1, name: scratch1, type: default, offset: 32772, size: 32768,
- alignment: 4, stack-id: 0, callee-saved-register: '', local-offset: 32768,
- debug-info-variable: '', debug-info-expression: '',
- debug-info-location: '' }
-constants:
body: |
bb.0.entry:
- successors: %bb.1.if(0x30000000), %bb.2.else(0x50000000)
+ successors: %bb.1(0x30000000), %bb.2(0x50000000)
liveins: $sgpr0_sgpr1, $sgpr3
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
@@ -123,43 +23,43 @@ body: |
$sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01)
+ BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`)
S_WAITCNT 127
S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 2, implicit $exec
$vgpr1 = V_MOV_B32_e32 32772, implicit $exec
- BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12)
- S_CBRANCH_SCC0 %bb.1.if, implicit killed $scc
+ BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`)
+ S_CBRANCH_SCC0 %bb.1, implicit killed $scc
- bb.2.else:
- successors: %bb.3.done(0x80000000)
+ bb.2:
+ successors: %bb.3(0x80000000)
liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
$sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 32772, implicit $exec
- S_BRANCH %bb.3.done
+ S_BRANCH %bb.3
- bb.1.if:
- successors: %bb.3.done(0x80000000)
+ bb.1:
+ successors: %bb.3(0x80000000)
liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
$sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
S_WAITCNT 3855
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
- bb.3.done:
+ bb.3:
liveins: $sgpr3, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $sgpr0
S_WAITCNT 127
$sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
$vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
- $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent") unordered 4 from %ir.else_ptr), (load syncscope("workgroup") seq_cst 4 from %ir.if_ptr)
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`)
$vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
$vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
S_WAITCNT 3952
- FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out)
+ FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32 addrspace(1)* undef`)
S_ENDPGM
...
Added: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir?rev=334241&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir Thu Jun 7 15:28:32 2018
@@ -0,0 +1,1054 @@
+# RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass si-memory-legalizer %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+
+# GCN-LABEL: name: load_singlethread_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_singlethread_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 1, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") unordered 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_singlethread_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_singlethread_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") monotonic 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_singlethread_acquire
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_singlethread_acquire
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") acquire 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_singlethread_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_singlethread_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") seq_cst 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_wavefront_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_wavefront_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") unordered 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_wavefront_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_wavefront_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") monotonic 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_wavefront_acquire
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_wavefront_acquire
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") acquire 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_wavefront_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_wavefront_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") seq_cst 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_workgroup_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_workgroup_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") unordered 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_workgroup_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_workgroup_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") monotonic 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_workgroup_acquire
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_workgroup_acquire
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") acquire 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_workgroup_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_workgroup_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") seq_cst 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_agent_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_agent_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") unordered 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_agent_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_agent_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") monotonic 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_agent_acquire
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_agent_acquire
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") acquire 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_agent_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_agent_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") seq_cst 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_system_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_system_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load unordered 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_system_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_system_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load monotonic 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_system_acquire
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_system_acquire
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load acquire 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: load_system_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_READ_B32
+# GCN-NOT: S_WAITCNT
+# GCN: FLAT_STORE_DWORD
+
+name: load_system_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load seq_cst 4 from `i32 addrspace(4)* undef`)
+ $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_singlethread_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_singlethread_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_singlethread_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_singlethread_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_singlethread_release
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_singlethread_release
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_singlethread_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_singlethread_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_wavefront_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_wavefront_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") unordered 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_wavefront_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_wavefront_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") monotonic 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_wavefront_release
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_wavefront_release
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") release 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_wavefront_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_wavefront_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") seq_cst 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_workgroup_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_workgroup_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") unordered 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_workgroup_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_workgroup_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") monotonic 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_workgroup_release
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_workgroup_release
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") release 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_workgroup_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_workgroup_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") seq_cst 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_agent_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_agent_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") unordered 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_agent_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_agent_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") monotonic 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_agent_release
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_agent_release
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") release 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_agent_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_agent_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") seq_cst 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_system_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_system_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store unordered 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_system_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_system_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store monotonic 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_system_release
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_system_release
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store release 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: store_system_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRITE_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: store_system_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store seq_cst 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: atomicrmw_singlethread_unordered
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRXCHG_RTN_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: atomicrmw_singlethread_unordered
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: atomicrmw_singlethread_monotonic
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRXCHG_RTN_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: atomicrmw_singlethread_monotonic
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: atomicrmw_singlethread_acquire
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRXCHG_RTN_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: atomicrmw_singlethread_acquire
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acquire 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: atomicrmw_singlethread_release
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRXCHG_RTN_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: atomicrmw_singlethread_release
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: atomicrmw_singlethread_acq_rel
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRXCHG_RTN_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: atomicrmw_singlethread_acq_rel
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acq_rel 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# GCN-LABEL: name: atomicrmw_singlethread_seq_cst
+
+# GCN-LABEL: bb.0:
+# GCN-NOT: S_WAITCNT
+# GCN: DS_WRXCHG_RTN_B32
+# GCN-NOT: S_WAITCNT
+# GCN: S_ENDPGM
+
+name: atomicrmw_singlethread_seq_cst
+body: |
+ bb.0:
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4)
+ $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4)
+ $m0 = S_MOV_B32 -1
+ $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
+ $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(4)* undef`)
+ S_ENDPGM
+
+...
More information about the llvm-commits
mailing list