[llvm] r356946 - AMDGPU: Add support for cross address space synchronization scopes
Konstantin Zhuravlyov via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 25 13:50:22 PDT 2019
Author: kzhuravl
Date: Mon Mar 25 13:50:21 2019
New Revision: 356946
URL: http://llvm.org/viewvc/llvm-project?rev=356946&view=rev
Log:
AMDGPU: Add support for cross address space synchronization scopes
Differential Revision: https://reviews.llvm.org/D59517
Modified:
llvm/trunk/docs/AMDGPUUsage.rst
llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
llvm/trunk/test/CodeGen/AMDGPU/atomicrmw-nand.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
llvm/trunk/test/CodeGen/AMDGPU/local-atomics-fp.ll
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-fence.ll
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-mesa3d.ll
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll
Modified: llvm/trunk/docs/AMDGPUUsage.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/AMDGPUUsage.rst?rev=356946&r1=356945&r2=356946&view=diff
==============================================================================
--- llvm/trunk/docs/AMDGPUUsage.rst (original)
+++ llvm/trunk/docs/AMDGPUUsage.rst Mon Mar 25 13:50:21 2019
@@ -323,62 +323,80 @@ is conservatively correct for OpenCL.
.. table:: AMDHSA LLVM Sync Scopes
:name: amdgpu-amdhsa-llvm-sync-scopes-table
- ================ ==========================================================
- LLVM Sync Scope Description
- ================ ==========================================================
- *none* The default: ``system``.
-
- Synchronizes with, and participates in modification and
- seq_cst total orderings with, other operations (except
- image operations) for all address spaces (except private,
- or generic that accesses private) provided the other
- operation's sync scope is:
-
- - ``system``.
- - ``agent`` and executed by a thread on the same agent.
- - ``workgroup`` and executed by a thread in the same
- workgroup.
- - ``wavefront`` and executed by a thread in the same
- wavefront.
-
- ``agent`` Synchronizes with, and participates in modification and
- seq_cst total orderings with, other operations (except
- image operations) for all address spaces (except private,
- or generic that accesses private) provided the other
- operation's sync scope is:
-
- - ``system`` or ``agent`` and executed by a thread on the
- same agent.
- - ``workgroup`` and executed by a thread in the same
- workgroup.
- - ``wavefront`` and executed by a thread in the same
- wavefront.
-
- ``workgroup`` Synchronizes with, and participates in modification and
- seq_cst total orderings with, other operations (except
- image operations) for all address spaces (except private,
- or generic that accesses private) provided the other
- operation's sync scope is:
-
- - ``system``, ``agent`` or ``workgroup`` and executed by a
- thread in the same workgroup.
- - ``wavefront`` and executed by a thread in the same
- wavefront.
-
- ``wavefront`` Synchronizes with, and participates in modification and
- seq_cst total orderings with, other operations (except
- image operations) for all address spaces (except private,
- or generic that accesses private) provided the other
- operation's sync scope is:
-
- - ``system``, ``agent``, ``workgroup`` or ``wavefront``
- and executed by a thread in the same wavefront.
-
- ``singlethread`` Only synchronizes with, and participates in modification
- and seq_cst total orderings with, other operations (except
- image operations) running in the same thread for all
- address spaces (for example, in signal handlers).
- ================ ==========================================================
+ ======================= ===================================================
+ LLVM Sync Scope Description
+ ======================= ===================================================
+ *none* The default: ``system``.
+
+ Synchronizes with, and participates in modification
+ and seq_cst total orderings with, other operations
+ (except image operations) for all address spaces
+ (except private, or generic that accesses private)
+ provided the other operation's sync scope is:
+
+ - ``system``.
+ - ``agent`` and executed by a thread on the same
+ agent.
+ - ``workgroup`` and executed by a thread in the
+ same workgroup.
+ - ``wavefront`` and executed by a thread in the
+ same wavefront.
+
+ ``agent`` Synchronizes with, and participates in modification
+ and seq_cst total orderings with, other operations
+ (except image operations) for all address spaces
+ (except private, or generic that accesses private)
+ provided the other operation's sync scope is:
+
+ - ``system`` or ``agent`` and executed by a thread
+ on the same agent.
+ - ``workgroup`` and executed by a thread in the
+ same workgroup.
+ - ``wavefront`` and executed by a thread in the
+ same wavefront.
+
+ ``workgroup`` Synchronizes with, and participates in modification
+ and seq_cst total orderings with, other operations
+ (except image operations) for all address spaces
+ (except private, or generic that accesses private)
+ provided the other operation's sync scope is:
+
+ - ``system``, ``agent`` or ``workgroup`` and
+ executed by a thread in the same workgroup.
+ - ``wavefront`` and executed by a thread in the
+ same wavefront.
+
+ ``wavefront`` Synchronizes with, and participates in modification
+ and seq_cst total orderings with, other operations
+ (except image operations) for all address spaces
+ (except private, or generic that accesses private)
+ provided the other operation's sync scope is:
+
+ - ``system``, ``agent``, ``workgroup`` or
+ ``wavefront`` and executed by a thread in the
+ same wavefront.
+
+ ``singlethread`` Only synchronizes with, and participates in
+ modification and seq_cst total orderings with,
+ other operations (except image operations) running
+ in the same thread for all address spaces (for
+ example, in signal handlers).
+
+ ``one-as`` Same as ``system`` but only synchronizes with other
+ operations within the same address space.
+
+ ``agent-one-as`` Same as ``agent`` but only synchronizes with other
+ operations within the same address space.
+
+ ``workgroup-one-as`` Same as ``workgroup`` but only synchronizes with
+ other operations within the same address space.
+
+ ``wavefront-one-as`` Same as ``wavefront`` but only synchronizes with
+ other operations within the same address space.
+
+ ``singlethread-one-as`` Same as ``singlethread`` but only synchronizes with
+ other operations within the same address space.
+ ======================= ===================================================
AMDGPU Intrinsics
-----------------
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp?rev=356946&r1=356945&r2=356946&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp Mon Mar 25 13:50:21 2019
@@ -23,6 +23,16 @@ AMDGPUMachineModuleInfo::AMDGPUMachineMo
AgentSSID = CTX.getOrInsertSyncScopeID("agent");
WorkgroupSSID = CTX.getOrInsertSyncScopeID("workgroup");
WavefrontSSID = CTX.getOrInsertSyncScopeID("wavefront");
+ SystemOneAddressSpaceSSID =
+ CTX.getOrInsertSyncScopeID("one-as");
+ AgentOneAddressSpaceSSID =
+ CTX.getOrInsertSyncScopeID("agent-one-as");
+ WorkgroupOneAddressSpaceSSID =
+ CTX.getOrInsertSyncScopeID("workgroup-one-as");
+ WavefrontOneAddressSpaceSSID =
+ CTX.getOrInsertSyncScopeID("wavefront-one-as");
+ SingleThreadOneAddressSpaceSSID =
+ CTX.getOrInsertSyncScopeID("singlethread-one-as");
}
} // end namespace llvm
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h?rev=356946&r1=356945&r2=356946&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h Mon Mar 25 13:50:21 2019
@@ -29,12 +29,22 @@ private:
// All supported memory/synchronization scopes can be found here:
// http://llvm.org/docs/AMDGPUUsage.html#memory-scopes
- /// Agent synchronization scope ID.
+ /// Agent synchronization scope ID (cross address space).
SyncScope::ID AgentSSID;
- /// Workgroup synchronization scope ID.
+ /// Workgroup synchronization scope ID (cross address space).
SyncScope::ID WorkgroupSSID;
- /// Wavefront synchronization scope ID.
+ /// Wavefront synchronization scope ID (cross address space).
SyncScope::ID WavefrontSSID;
+ /// System synchronization scope ID (single address space).
+ SyncScope::ID SystemOneAddressSpaceSSID;
+ /// Agent synchronization scope ID (single address space).
+ SyncScope::ID AgentOneAddressSpaceSSID;
+ /// Workgroup synchronization scope ID (single address space).
+ SyncScope::ID WorkgroupOneAddressSpaceSSID;
+ /// Wavefront synchronization scope ID (single address space).
+ SyncScope::ID WavefrontOneAddressSpaceSSID;
+ /// Single thread synchronization scope ID (single address space).
+ SyncScope::ID SingleThreadOneAddressSpaceSSID;
/// In AMDGPU target synchronization scopes are inclusive, meaning a
/// larger synchronization scope is inclusive of a smaller synchronization
@@ -43,35 +53,70 @@ private:
/// \returns \p SSID's inclusion ordering, or "None" if \p SSID is not
/// supported by the AMDGPU target.
Optional<uint8_t> getSyncScopeInclusionOrdering(SyncScope::ID SSID) const {
- if (SSID == SyncScope::SingleThread)
+ if (SSID == SyncScope::SingleThread ||
+ SSID == getSingleThreadOneAddressSpaceSSID())
return 0;
- else if (SSID == getWavefrontSSID())
+ else if (SSID == getWavefrontSSID() ||
+ SSID == getWavefrontOneAddressSpaceSSID())
return 1;
- else if (SSID == getWorkgroupSSID())
+ else if (SSID == getWorkgroupSSID() ||
+ SSID == getWorkgroupOneAddressSpaceSSID())
return 2;
- else if (SSID == getAgentSSID())
+ else if (SSID == getAgentSSID() ||
+ SSID == getAgentOneAddressSpaceSSID())
return 3;
- else if (SSID == SyncScope::System)
+ else if (SSID == SyncScope::System ||
+ SSID == getSystemOneAddressSpaceSSID())
return 4;
return None;
}
+ /// \returns True if \p SSID is restricted to single address space, false
+ /// otherwise
+ bool isOneAddressSpace(SyncScope::ID SSID) const {
+ return SSID == getSingleThreadOneAddressSpaceSSID() ||
+ SSID == getWavefrontOneAddressSpaceSSID() ||
+ SSID == getWorkgroupOneAddressSpaceSSID() ||
+ SSID == getAgentOneAddressSpaceSSID() ||
+ SSID == getSystemOneAddressSpaceSSID();
+ }
+
public:
AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI);
- /// \returns Agent synchronization scope ID.
+ /// \returns Agent synchronization scope ID (cross address space).
SyncScope::ID getAgentSSID() const {
return AgentSSID;
}
- /// \returns Workgroup synchronization scope ID.
+ /// \returns Workgroup synchronization scope ID (cross address space).
SyncScope::ID getWorkgroupSSID() const {
return WorkgroupSSID;
}
- /// \returns Wavefront synchronization scope ID.
+ /// \returns Wavefront synchronization scope ID (cross address space).
SyncScope::ID getWavefrontSSID() const {
return WavefrontSSID;
}
+ /// \returns System synchronization scope ID (single address space).
+ SyncScope::ID getSystemOneAddressSpaceSSID() const {
+ return SystemOneAddressSpaceSSID;
+ }
+ /// \returns Agent synchronization scope ID (single address space).
+ SyncScope::ID getAgentOneAddressSpaceSSID() const {
+ return AgentOneAddressSpaceSSID;
+ }
+ /// \returns Workgroup synchronization scope ID (single address space).
+ SyncScope::ID getWorkgroupOneAddressSpaceSSID() const {
+ return WorkgroupOneAddressSpaceSSID;
+ }
+ /// \returns Wavefront synchronization scope ID (single address space).
+ SyncScope::ID getWavefrontOneAddressSpaceSSID() const {
+ return WavefrontOneAddressSpaceSSID;
+ }
+ /// \returns Single thread synchronization scope ID (single address space).
+ SyncScope::ID getSingleThreadOneAddressSpaceSSID() const {
+ return SingleThreadOneAddressSpaceSSID;
+ }
/// In AMDGPU target synchronization scopes are inclusive, meaning a
/// larger synchronization scope is inclusive of a smaller synchronization
@@ -87,7 +132,11 @@ public:
if (!AIO || !BIO)
return None;
- return AIO.getValue() > BIO.getValue();
+ bool IsAOneAddressSpace = isOneAddressSpace(A);
+ bool IsBOneAddressSpace = isOneAddressSpace(B);
+
+ return AIO.getValue() >= BIO.getValue() &&
+ (IsAOneAddressSpace == IsBOneAddressSpace || !IsAOneAddressSpace);
}
};
Modified: llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp?rev=356946&r1=356945&r2=356946&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp Mon Mar 25 13:50:21 2019
@@ -417,35 +417,46 @@ void SIMemOpAccess::reportUnsupported(co
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
SIAtomicAddrSpace InstrScope) const {
- /// TODO: For now assume OpenCL memory model which treats each
- /// address space as having a separate happens-before relation, and
- /// so an instruction only has ordering with respect to the address
- /// space it accesses, and if it accesses multiple address spaces it
- /// does not require ordering of operations in different address
- /// spaces.
- if (SSID == SyncScope::System)
+ if (SSID == SyncScope::System)
+ return std::make_tuple(SIAtomicScope::SYSTEM,
+ SIAtomicAddrSpace::ATOMIC,
+ true);
+ if (SSID == MMI->getAgentSSID())
+ return std::make_tuple(SIAtomicScope::AGENT,
+ SIAtomicAddrSpace::ATOMIC,
+ true);
+ if (SSID == MMI->getWorkgroupSSID())
+ return std::make_tuple(SIAtomicScope::WORKGROUP,
+ SIAtomicAddrSpace::ATOMIC,
+ true);
+ if (SSID == MMI->getWavefrontSSID())
+ return std::make_tuple(SIAtomicScope::WAVEFRONT,
+ SIAtomicAddrSpace::ATOMIC,
+ true);
+ if (SSID == SyncScope::SingleThread)
+ return std::make_tuple(SIAtomicScope::SINGLETHREAD,
+ SIAtomicAddrSpace::ATOMIC,
+ true);
+ if (SSID == MMI->getSystemOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::SYSTEM,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
- if (SSID == MMI->getAgentSSID())
+ if (SSID == MMI->getAgentOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::AGENT,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
- if (SSID == MMI->getWorkgroupSSID())
+ if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::WORKGROUP,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
- if (SSID == MMI->getWavefrontSSID())
+ if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::WAVEFRONT,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
- if (SSID == SyncScope::SingleThread)
+ if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::SINGLETHREAD,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
- /// TODO: To support HSA Memory Model need to add additional memory
- /// scopes that specify that do require cross address space
- /// ordering.
return None;
}
@@ -721,13 +732,12 @@ bool SIGfx6CacheControl::insertWait(Mach
bool VMCnt = false;
bool LGKMCnt = false;
- bool EXPCnt = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
- VMCnt = true;
+ VMCnt |= true;
break;
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
@@ -751,7 +761,7 @@ bool SIGfx6CacheControl::insertWait(Mach
// also synchronizing with global/GDS memory as LDS operations
// could be reordered with respect to later global/GDS memory
// operations of the same wave.
- LGKMCnt = IsCrossAddrSpaceOrdering;
+ LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
@@ -773,7 +783,7 @@ bool SIGfx6CacheControl::insertWait(Mach
// also synchronizing with global/LDS memory as GDS operations
// could be reordered with respect to later global/LDS memory
// operations of the same wave.
- EXPCnt = IsCrossAddrSpaceOrdering;
+ LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
@@ -786,11 +796,11 @@ bool SIGfx6CacheControl::insertWait(Mach
}
}
- if (VMCnt || LGKMCnt || EXPCnt) {
+ if (VMCnt || LGKMCnt) {
unsigned WaitCntImmediate =
AMDGPU::encodeWaitcnt(IV,
VMCnt ? 0 : getVmcntBitMask(IV),
- EXPCnt ? 0 : getExpcntBitMask(IV),
+ getExpcntBitMask(IV),
LGKMCnt ? 0 : getLgkmcntBitMask(IV));
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
Changed = true;
Modified: llvm/trunk/test/CodeGen/AMDGPU/atomicrmw-nand.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/atomicrmw-nand.ll?rev=356946&r1=356945&r2=356946&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/atomicrmw-nand.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/atomicrmw-nand.ll Mon Mar 25 13:50:21 2019
@@ -12,8 +12,10 @@ define i32 @atomic_nand_i32_lds(i32 addr
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_not_b32_e32 v1, v2
; GCN-NEXT: v_or_b32_e32 v1, -5, v1
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GCN-NEXT: ds_cmpst_rtn_b32 v1, v0, v2, v1
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT: buffer_wbinvl1_vol
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll?rev=356946&r1=356945&r2=356946&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll Mon Mar 25 13:50:21 2019
@@ -27,9 +27,9 @@ bb:
%tmp1 = zext i32 %tmp to i64
%tmp2 = getelementptr inbounds [448 x i32], [448 x i32] addrspace(3)* @0, i32 0, i32 %tmp
%tmp3 = load i32, i32 addrspace(3)* %tmp2, align 4
- fence syncscope("workgroup") release
+ fence syncscope("workgroup-one-as") release
tail call void @llvm.amdgcn.s.barrier()
- fence syncscope("workgroup") acquire
+ fence syncscope("workgroup-one-as") acquire
%tmp4 = add nsw i32 %tmp3, %tmp3
%tmp5 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp4, i32 177, i32 15, i32 15, i1 zeroext false)
%tmp6 = add nsw i32 %tmp5, %tmp4
Modified: llvm/trunk/test/CodeGen/AMDGPU/local-atomics-fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/local-atomics-fp.ll?rev=356946&r1=356945&r2=356946&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/local-atomics-fp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/local-atomics-fp.ll Mon Mar 25 13:50:21 2019
@@ -34,7 +34,7 @@ define void @lds_atomic_fadd_noret_f32(f
; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32
; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
-; HAS-ATOMICS: s_waitcnt lgkmcnt(1)
+; HAS-ATOMICS: s_waitcnt vmcnt(0) lgkmcnt(0)
; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
define amdgpu_kernel void @lds_ds_fadd(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
%idx.add = add nuw i32 %idx, 4
@@ -48,6 +48,27 @@ define amdgpu_kernel void @lds_ds_fadd(f
store float %a3, float addrspace(1)* %out
ret void
}
+
+; GCN-LABEL: {{^}}lds_ds_fadd_one_as:
+; VI-DAG: s_mov_b32 m0
+; GFX9-NOT: m0
+; HAS-ATOMICS-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000
+; HAS-ATOMICS: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32
+; HAS-ATOMICS: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64
+; HAS-ATOMICS: s_waitcnt lgkmcnt(1)
+; HAS-ATOMICS: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]]
+define amdgpu_kernel void @lds_ds_fadd_one_as(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) {
+ %idx.add = add nuw i32 %idx, 4
+ %shl0 = shl i32 %idx.add, 3
+ %shl1 = shl i32 %idx.add, 4
+ %ptr0 = inttoptr i32 %shl0 to float addrspace(3)*
+ %ptr1 = inttoptr i32 %shl1 to float addrspace(3)*
+ %a1 = atomicrmw fadd float addrspace(3)* %ptr0, float 4.2e+1 syncscope("one-as") seq_cst
+ %a2 = atomicrmw fadd float addrspace(3)* %ptr1, float 4.2e+1 syncscope("one-as") seq_cst
+ %a3 = atomicrmw fadd float addrspace(3)* %ptrf, float %a1 syncscope("one-as") seq_cst
+ store float %a3, float addrspace(1)* %out
+ ret void
+}
; GCN-LABEL: {{^}}lds_atomic_fadd_ret_f64:
; GCN: ds_read_b64
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll?rev=356946&r1=356945&r2=356946&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll Mon Mar 25 13:50:21 2019
@@ -6,7 +6,7 @@
; FUNC-LABEL: {{^}}system_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_acquire() {
@@ -18,7 +18,7 @@ entry:
; FUNC-LABEL: {{^}}system_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_release() {
entry:
@@ -29,7 +29,7 @@ entry:
; FUNC-LABEL: {{^}}system_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_acq_rel() {
@@ -41,7 +41,7 @@ entry:
; FUNC-LABEL: {{^}}system_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_seq_cst() {
@@ -50,6 +50,53 @@ entry:
ret void
}
+; FUNC-LABEL: {{^}}system_one_as_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @system_one_as_acquire() {
+entry:
+ fence syncscope("one-as") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}system_one_as_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @system_one_as_release() {
+entry:
+ fence syncscope("one-as") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}system_one_as_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @system_one_as_acq_rel() {
+entry:
+ fence syncscope("one-as") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}system_one_as_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @system_one_as_seq_cst() {
+entry:
+ fence syncscope("one-as") seq_cst
+ ret void
+}
+
; FUNC-LABEL: {{^}}singlethread_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
@@ -90,10 +137,50 @@ entry:
ret void
}
+; FUNC-LABEL: {{^}}singlethread_one_as_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_one_as_acquire() {
+entry:
+ fence syncscope("singlethread-one-as") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}singlethread_one_as_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_one_as_release() {
+entry:
+ fence syncscope("singlethread-one-as") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}singlethread_one_as_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_one_as_acq_rel() {
+entry:
+ fence syncscope("singlethread-one-as") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}singlethread_one_as_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_one_as_seq_cst() {
+entry:
+ fence syncscope("singlethread-one-as") seq_cst
+ ret void
+}
+
; FUNC-LABEL: {{^}}agent_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_acquire() {
@@ -105,7 +192,7 @@ entry:
; FUNC-LABEL: {{^}}agent_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_release() {
entry:
@@ -116,7 +203,7 @@ entry:
; FUNC-LABEL: {{^}}agent_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_acq_rel() {
@@ -128,7 +215,7 @@ entry:
; FUNC-LABEL: {{^}}agent_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_seq_cst() {
@@ -137,9 +224,56 @@ entry:
ret void
}
+; FUNC-LABEL: {{^}}agent_one_as_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_one_as_acquire() {
+entry:
+ fence syncscope("agent-one-as") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}agent_one_as_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_one_as_release() {
+entry:
+ fence syncscope("agent-one-as") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}agent_one_as_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_one_as_acq_rel() {
+entry:
+ fence syncscope("agent-one-as") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}agent_one_as_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_one_as_seq_cst() {
+entry:
+ fence syncscope("agent-one-as") seq_cst
+ ret void
+}
+
; FUNC-LABEL: {{^}}workgroup_acquire:
; GCN: %bb.0
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acquire() {
@@ -150,7 +284,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_release:
; GCN: %bb.0
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_release() {
@@ -161,7 +295,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_acq_rel:
; GCN: %bb.0
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acq_rel() {
@@ -172,7 +306,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_seq_cst:
; GCN: %bb.0
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_seq_cst() {
@@ -181,6 +315,50 @@ entry:
ret void
}
+; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
+; GCN: %bb.0
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_one_as_acquire() {
+entry:
+ fence syncscope("workgroup-one-as") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_one_as_release:
+; GCN: %bb.0
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_one_as_release() {
+entry:
+ fence syncscope("workgroup-one-as") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_one_as_acq_rel() {
+entry:
+ fence syncscope("workgroup-one-as") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_one_as_seq_cst() {
+entry:
+ fence syncscope("workgroup-one-as") seq_cst
+ ret void
+}
+
; FUNC-LABEL: {{^}}wavefront_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
@@ -220,3 +398,43 @@ entry:
fence syncscope("wavefront") seq_cst
ret void
}
+
+; FUNC-LABEL: {{^}}wavefront_one_as_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_one_as_acquire() {
+entry:
+ fence syncscope("wavefront-one-as") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}wavefront_one_as_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_one_as_release() {
+entry:
+ fence syncscope("wavefront-one-as") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}wavefront_one_as_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_one_as_acq_rel() {
+entry:
+ fence syncscope("wavefront-one-as") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}wavefront_one_as_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_one_as_seq_cst() {
+entry:
+ fence syncscope("wavefront-one-as") seq_cst
+ ret void
+}
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll?rev=356946&r1=356945&r2=356946&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll Mon Mar 25 13:50:21 2019
@@ -2,9 +2,9 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; GCN-LABEL: {{^}}system_monotonic_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_monotonic_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -15,9 +15,9 @@ entry:
}
; GCN-LABEL: {{^}}system_acquire_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acquire_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -28,9 +28,9 @@ entry:
}
; GCN-LABEL: {{^}}system_release_monotonic:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_release_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -41,9 +41,9 @@ entry:
}
; GCN-LABEL: {{^}}system_acq_rel_monotonic:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acq_rel_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -54,9 +54,9 @@ entry:
}
; GCN-LABEL: {{^}}system_seq_cst_monotonic:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_seq_cst_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -67,9 +67,9 @@ entry:
}
; GCN-LABEL: {{^}}system_acquire_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acquire_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -80,9 +80,9 @@ entry:
}
; GCN-LABEL: {{^}}system_release_acquire:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_release_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -93,9 +93,9 @@ entry:
}
; GCN-LABEL: {{^}}system_acq_rel_acquire:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acq_rel_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -106,9 +106,9 @@ entry:
}
; GCN-LABEL: {{^}}system_seq_cst_acquire:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_seq_cst_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -119,9 +119,9 @@ entry:
}
; GCN-LABEL: {{^}}system_seq_cst_seq_cst:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_seq_cst_seq_cst(
i32* %out, i32 %in, i32 %old) {
@@ -131,11 +131,141 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}singlethread_monotonic_monotonic:
+; GCN-LABEL: {{^}}system_one_as_monotonic_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @system_one_as_monotonic_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_acquire_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @system_one_as_acquire_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_release_monotonic:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @system_one_as_release_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") release monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_acq_rel_monotonic:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @system_one_as_acq_rel_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_seq_cst_monotonic:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @system_one_as_seq_cst_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_acquire_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @system_one_as_acquire_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_release_acquire:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @system_one_as_release_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") release acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_acq_rel_acquire:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @system_one_as_acq_rel_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_seq_cst_acquire:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @system_one_as_seq_cst_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_seq_cst_seq_cst:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @system_one_as_seq_cst_seq_cst(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_monotonic_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_monotonic_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -145,9 +275,9 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_acquire_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acquire_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -158,9 +288,9 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_release_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_release_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -171,9 +301,9 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_acq_rel_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acq_rel_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -184,9 +314,9 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_seq_cst_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_seq_cst_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -197,9 +327,9 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_acquire_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acquire_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -210,9 +340,9 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_release_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_release_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -223,9 +353,9 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_acq_rel_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acq_rel_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -236,9 +366,9 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_seq_cst_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_seq_cst_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -249,9 +379,9 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_seq_cst_seq_cst:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_seq_cst_seq_cst(
i32* %out, i32 %in, i32 %old) {
@@ -261,11 +391,141 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}agent_monotonic_monotonic:
+; GCN-LABEL: {{^}}singlethread_one_as_monotonic_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @singlethread_one_as_monotonic_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_one_as_acquire_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @singlethread_one_as_acquire_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_one_as_release_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @singlethread_one_as_release_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_one_as_acq_rel_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @singlethread_one_as_acq_rel_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_one_as_seq_cst_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @singlethread_one_as_seq_cst_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_one_as_acquire_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @singlethread_one_as_acquire_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_one_as_release_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @singlethread_one_as_release_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_one_as_acq_rel_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @singlethread_one_as_acq_rel_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_one_as_seq_cst_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @singlethread_one_as_seq_cst_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_one_as_seq_cst_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @singlethread_one_as_seq_cst_seq_cst(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_monotonic_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_monotonic_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -275,9 +535,9 @@ entry:
}
; GCN-LABEL: {{^}}agent_acquire_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acquire_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -288,9 +548,9 @@ entry:
}
; GCN-LABEL: {{^}}agent_release_monotonic:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_release_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -301,9 +561,9 @@ entry:
}
; GCN-LABEL: {{^}}agent_acq_rel_monotonic:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acq_rel_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -314,9 +574,9 @@ entry:
}
; GCN-LABEL: {{^}}agent_seq_cst_monotonic:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_seq_cst_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -327,9 +587,9 @@ entry:
}
; GCN-LABEL: {{^}}agent_acquire_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acquire_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -340,9 +600,9 @@ entry:
}
; GCN-LABEL: {{^}}agent_release_acquire:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_release_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -353,9 +613,9 @@ entry:
}
; GCN-LABEL: {{^}}agent_acq_rel_acquire:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acq_rel_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -366,9 +626,9 @@ entry:
}
; GCN-LABEL: {{^}}agent_seq_cst_acquire:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_seq_cst_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -379,9 +639,9 @@ entry:
}
; GCN-LABEL: {{^}}agent_seq_cst_seq_cst:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_seq_cst_seq_cst(
i32* %out, i32 %in, i32 %old) {
@@ -391,11 +651,141 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}workgroup_monotonic_monotonic:
+; GCN-LABEL: {{^}}agent_one_as_monotonic_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @agent_one_as_monotonic_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_acquire_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @agent_one_as_acquire_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_release_monotonic:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @agent_one_as_release_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_acq_rel_monotonic:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @agent_one_as_acq_rel_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_seq_cst_monotonic:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @agent_one_as_seq_cst_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_acquire_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @agent_one_as_acquire_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_release_acquire:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @agent_one_as_release_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_acq_rel_acquire:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @agent_one_as_acq_rel_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_seq_cst_acquire:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @agent_one_as_seq_cst_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_seq_cst_seq_cst:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @agent_one_as_seq_cst_seq_cst(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_monotonic_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_monotonic_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -405,9 +795,9 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_acquire_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acquire_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -418,9 +808,9 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_release_monotonic:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_release_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -431,9 +821,9 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_acq_rel_monotonic:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acq_rel_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -444,9 +834,9 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_seq_cst_monotonic:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_seq_cst_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -457,9 +847,9 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_acquire_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acquire_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -470,9 +860,9 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_release_acquire:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_release_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -483,9 +873,9 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_acq_rel_acquire:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acq_rel_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -496,9 +886,9 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_seq_cst_acquire:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_seq_cst_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -509,9 +899,9 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_seq_cst_seq_cst:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_seq_cst_seq_cst(
i32* %out, i32 %in, i32 %old) {
@@ -521,11 +911,141 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}wavefront_monotonic_monotonic:
+; GCN-LABEL: {{^}}workgroup_one_as_monotonic_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @workgroup_one_as_monotonic_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_acquire_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @workgroup_one_as_acquire_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_release_monotonic:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @workgroup_one_as_release_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_acq_rel_monotonic:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @workgroup_one_as_acq_rel_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_monotonic:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @workgroup_one_as_seq_cst_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_acquire_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @workgroup_one_as_acquire_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_release_acquire:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @workgroup_one_as_release_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_acq_rel_acquire:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @workgroup_one_as_acq_rel_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_acquire:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @workgroup_one_as_seq_cst_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_seq_cst:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @workgroup_one_as_seq_cst_seq_cst(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_monotonic_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_monotonic_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -535,9 +1055,9 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_acquire_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acquire_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -548,9 +1068,9 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_release_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_release_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -561,9 +1081,9 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_acq_rel_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acq_rel_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -574,9 +1094,9 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_seq_cst_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_seq_cst_monotonic(
i32* %out, i32 %in, i32 %old) {
@@ -587,9 +1107,9 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_acquire_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acquire_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -600,9 +1120,9 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_release_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_release_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -613,9 +1133,9 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_acq_rel_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acq_rel_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -626,9 +1146,9 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_seq_cst_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_seq_cst_acquire(
i32* %out, i32 %in, i32 %old) {
@@ -639,9 +1159,9 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_seq_cst_seq_cst:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_seq_cst_seq_cst(
i32* %out, i32 %in, i32 %old) {
@@ -650,3 +1170,133 @@ entry:
%val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst
ret void
}
+
+; GCN-LABEL: {{^}}wavefront_one_as_monotonic_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @wavefront_one_as_monotonic_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_one_as_acquire_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @wavefront_one_as_acquire_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_one_as_release_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @wavefront_one_as_release_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_one_as_acq_rel_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @wavefront_one_as_acq_rel_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_one_as_seq_cst_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @wavefront_one_as_seq_cst_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_one_as_acquire_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @wavefront_one_as_acquire_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_one_as_release_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @wavefront_one_as_release_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_one_as_acq_rel_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @wavefront_one_as_acq_rel_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_one_as_seq_cst_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @wavefront_one_as_seq_cst_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_one_as_seq_cst_seq_cst:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @wavefront_one_as_seq_cst_seq_cst(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst
+ ret void
+}
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-fence.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-fence.ll?rev=356946&r1=356945&r2=356946&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-fence.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-fence.ll Mon Mar 25 13:50:21 2019
@@ -2,7 +2,7 @@
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s
-; FUNC-LABEL: {{^}}system_acquire:
+; FUNC-LABEL: {{^}}system_one_as_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GFX6: s_waitcnt vmcnt(0){{$}}
@@ -10,6 +10,232 @@
; GFX8: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
+define amdgpu_kernel void @system_one_as_acquire() {
+entry:
+ fence syncscope("one-as") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}system_one_as_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @system_one_as_release() {
+entry:
+ fence syncscope("one-as") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}system_one_as_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX6: buffer_wbinvl1{{$}}
+; GFX8: buffer_wbinvl1_vol{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @system_one_as_acq_rel() {
+entry:
+ fence syncscope("one-as") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}system_one_as_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX6: buffer_wbinvl1{{$}}
+; GFX8: buffer_wbinvl1_vol{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @system_one_as_seq_cst() {
+entry:
+ fence syncscope("one-as") seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}singlethread_one_as_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_one_as_acquire() {
+entry:
+ fence syncscope("singlethread-one-as") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}singlethread_one_as_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_one_as_release() {
+entry:
+ fence syncscope("singlethread-one-as") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}singlethread_one_as_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_one_as_acq_rel() {
+entry:
+ fence syncscope("singlethread-one-as") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}singlethread_one_as_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_one_as_seq_cst() {
+entry:
+ fence syncscope("singlethread-one-as") seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}agent_one_as_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GFX6: s_waitcnt vmcnt(0){{$}}
+; GFX6-NEXT: buffer_wbinvl1{{$}}
+; GFX8: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_one_as_acquire() {
+entry:
+ fence syncscope("agent-one-as") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}agent_one_as_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_one_as_release() {
+entry:
+ fence syncscope("agent-one-as") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}agent_one_as_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX6: buffer_wbinvl1{{$}}
+; GFX8: buffer_wbinvl1_vol{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_one_as_acq_rel() {
+entry:
+ fence syncscope("agent-one-as") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}agent_one_as_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX6: buffer_wbinvl1{{$}}
+; GFX8: buffer_wbinvl1_vol{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_one_as_seq_cst() {
+entry:
+ fence syncscope("agent-one-as") seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
+; GCN: %bb.0
+; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_one_as_acquire() {
+entry:
+ fence syncscope("workgroup-one-as") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_one_as_release:
+; GCN: %bb.0
+; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_one_as_release() {
+entry:
+ fence syncscope("workgroup-one-as") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
+; GCN: %bb.0
+; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_one_as_acq_rel() {
+entry:
+ fence syncscope("workgroup-one-as") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
+; GCN: %bb.0
+; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_one_as_seq_cst() {
+entry:
+ fence syncscope("workgroup-one-as") seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}wavefront_one_as_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_one_as_acquire() {
+entry:
+ fence syncscope("wavefront-one-as") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}wavefront_one_as_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_one_as_release() {
+entry:
+ fence syncscope("wavefront-one-as") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}wavefront_one_as_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_one_as_acq_rel() {
+entry:
+ fence syncscope("wavefront-one-as") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}wavefront_one_as_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_one_as_seq_cst() {
+entry:
+ fence syncscope("wavefront-one-as") seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}system_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX6-NEXT: buffer_wbinvl1{{$}}
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
+; GCN: s_endpgm
define amdgpu_kernel void @system_acquire() {
entry:
fence acquire
@@ -19,7 +245,7 @@ entry:
; FUNC-LABEL: {{^}}system_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_release() {
entry:
@@ -30,7 +256,7 @@ entry:
; FUNC-LABEL: {{^}}system_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
@@ -43,7 +269,7 @@ entry:
; FUNC-LABEL: {{^}}system_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
@@ -96,9 +322,9 @@ entry:
; FUNC-LABEL: {{^}}agent_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GFX6: s_waitcnt vmcnt(0){{$}}
+; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6-NEXT: buffer_wbinvl1{{$}}
-; GFX8: s_waitcnt vmcnt(0){{$}}
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_acquire() {
@@ -110,7 +336,7 @@ entry:
; FUNC-LABEL: {{^}}agent_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_release() {
entry:
@@ -121,7 +347,7 @@ entry:
; FUNC-LABEL: {{^}}agent_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
@@ -134,7 +360,7 @@ entry:
; FUNC-LABEL: {{^}}agent_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
; GCN: s_endpgm
@@ -146,7 +372,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_acquire:
; GCN: %bb.0
-; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acquire() {
@@ -157,7 +383,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_release:
; GCN: %bb.0
-; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_release() {
@@ -168,7 +394,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_acq_rel:
; GCN: %bb.0
-; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acq_rel() {
@@ -179,7 +405,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_seq_cst:
; GCN: %bb.0
-; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_seq_cst() {
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir?rev=356946&r1=356945&r2=356946&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir Mon Mar 25 13:50:21 2019
@@ -104,7 +104,7 @@ body: |
S_WAITCNT 127
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
S_WAITCNT 3952
- BUFFER_ATOMIC_SMAX_ADDR64 killed $vgpr0, killed $vgpr1_vgpr2, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 400, 0, implicit $exec :: (volatile load seq_cst 4 from %ir.gep)
+ BUFFER_ATOMIC_SMAX_ADDR64 killed $vgpr0, killed $vgpr1_vgpr2, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 400, 0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from %ir.gep)
bb.2.exit:
liveins: $sgpr2_sgpr3
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll?rev=356946&r1=356945&r2=356946&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll Mon Mar 25 13:50:21 2019
@@ -1,11 +1,311 @@
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
-; GCN-LABEL: {{^}}system_monotonic:
+; GCN-LABEL: {{^}}system_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @system_one_as_monotonic(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @system_one_as_acquire(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_release:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @system_one_as_release(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") release
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_acq_rel:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @system_one_as_acq_rel(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acq_rel
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_seq_cst:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @system_one_as_seq_cst(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @singlethread_one_as_monotonic(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_one_as_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @singlethread_one_as_acquire(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_one_as_release:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @singlethread_one_as_release(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") release
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_one_as_acq_rel:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @singlethread_one_as_acq_rel(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") acq_rel
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @singlethread_one_as_seq_cst(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("singlethread-one-as") seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @agent_one_as_monotonic(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @agent_one_as_acquire(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_release:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @agent_one_as_release(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") release
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_acq_rel:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @agent_one_as_acq_rel(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acq_rel
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_seq_cst:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+define amdgpu_kernel void @agent_one_as_seq_cst(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @workgroup_one_as_monotonic(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @workgroup_one_as_acquire(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_release:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @workgroup_one_as_release(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") release
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_acq_rel:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @workgroup_one_as_acq_rel(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acq_rel
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @workgroup_one_as_seq_cst(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @wavefront_one_as_monotonic(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_one_as_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @wavefront_one_as_acquire(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_one_as_release:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @wavefront_one_as_release(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") release
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_one_as_acq_rel:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @wavefront_one_as_acq_rel(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") acq_rel
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
+define amdgpu_kernel void @wavefront_one_as_seq_cst(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront-one-as") seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_monotonic(
i32* %out, i32 %in) {
entry:
@@ -14,9 +314,9 @@ entry:
}
; GCN-LABEL: {{^}}system_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acquire(
i32* %out, i32 %in) {
@@ -26,9 +326,9 @@ entry:
}
; GCN-LABEL: {{^}}system_release:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_release(
i32* %out, i32 %in) {
@@ -38,9 +338,9 @@ entry:
}
; GCN-LABEL: {{^}}system_acq_rel:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_acq_rel(
i32* %out, i32 %in) {
@@ -50,9 +350,9 @@ entry:
}
; GCN-LABEL: {{^}}system_seq_cst:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @system_seq_cst(
i32* %out, i32 %in) {
@@ -62,9 +362,9 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_monotonic(
i32* %out, i32 %in) {
@@ -74,9 +374,9 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acquire(
i32* %out, i32 %in) {
@@ -86,9 +386,9 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_release:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_release(
i32* %out, i32 %in) {
@@ -98,9 +398,9 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_acq_rel:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_acq_rel(
i32* %out, i32 %in) {
@@ -110,9 +410,9 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_seq_cst:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @singlethread_seq_cst(
i32* %out, i32 %in) {
@@ -122,9 +422,9 @@ entry:
}
; GCN-LABEL: {{^}}agent_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_monotonic(
i32* %out, i32 %in) {
@@ -134,9 +434,9 @@ entry:
}
; GCN-LABEL: {{^}}agent_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acquire(
i32* %out, i32 %in) {
@@ -146,9 +446,9 @@ entry:
}
; GCN-LABEL: {{^}}agent_release:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_release(
i32* %out, i32 %in) {
@@ -158,9 +458,9 @@ entry:
}
; GCN-LABEL: {{^}}agent_acq_rel:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_acq_rel(
i32* %out, i32 %in) {
@@ -170,9 +470,9 @@ entry:
}
; GCN-LABEL: {{^}}agent_seq_cst:
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
define amdgpu_kernel void @agent_seq_cst(
i32* %out, i32 %in) {
@@ -182,9 +482,9 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_monotonic(
i32* %out, i32 %in) {
@@ -194,9 +494,9 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acquire(
i32* %out, i32 %in) {
@@ -206,9 +506,9 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_release:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_release(
i32* %out, i32 %in) {
@@ -218,9 +518,9 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_acq_rel:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_acq_rel(
i32* %out, i32 %in) {
@@ -230,9 +530,9 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_seq_cst:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @workgroup_seq_cst(
i32* %out, i32 %in) {
@@ -242,9 +542,9 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_monotonic(
i32* %out, i32 %in) {
@@ -254,9 +554,9 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acquire(
i32* %out, i32 %in) {
@@ -266,9 +566,9 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_release:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_release(
i32* %out, i32 %in) {
@@ -278,9 +578,9 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_acq_rel:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_acq_rel(
i32* %out, i32 %in) {
@@ -290,9 +590,9 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_seq_cst:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: buffer_wbinvl1_vol
define amdgpu_kernel void @wavefront_seq_cst(
i32* %out, i32 %in) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir?rev=356946&r1=356945&r2=356946&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir Mon Mar 25 13:50:21 2019
@@ -11,7 +11,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
$vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec
- renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load seq_cst 4 from `i32 addrspace(42)* undef`)
+ renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(42)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -30,7 +30,7 @@ body: |
$vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent") seq_cst 4 into `i32 addrspace(42)* undef`)
+ FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(42)* undef`)
S_ENDPGM 0
...
@@ -47,7 +47,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 killed $sgpr4, implicit $exec, implicit $exec
$vgpr1 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $exec
$vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
- FLAT_ATOMIC_CMPSWAP killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("workgroup") seq_cst seq_cst 4 on `i32 addrspace(42)* undef`)
+ FLAT_ATOMIC_CMPSWAP killed renamable $vgpr2_vgpr3, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("workgroup-one-as") seq_cst seq_cst 4 on `i32 addrspace(42)* undef`)
S_ENDPGM 0
...
@@ -63,7 +63,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3
$vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec
$vgpr2 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- FLAT_ATOMIC_SWAP killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("wavefront") seq_cst 4 on `i32 addrspace(42)* undef`)
+ FLAT_ATOMIC_SWAP killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store syncscope("wavefront-one-as") seq_cst 4 on `i32 addrspace(42)* undef`)
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll?rev=356946&r1=356945&r2=356946&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll Mon Mar 25 13:50:21 2019
@@ -5,282 +5,282 @@
declare i32 @llvm.amdgcn.workitem.id.x()
-; GCN-LABEL: {{^}}system_unordered:
+; GCN-LABEL: {{^}}system_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @system_unordered(
+define amdgpu_kernel void @system_one_as_unordered(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in unordered, align 4
+ %val = load atomic i32, i32* %in syncscope("one-as") unordered, align 4
store i32 %val, i32* %out
ret void
}
-; GCN-LABEL: {{^}}system_monotonic:
+; GCN-LABEL: {{^}}system_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @system_monotonic(
+define amdgpu_kernel void @system_one_as_monotonic(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in monotonic, align 4
+ %val = load atomic i32, i32* %in syncscope("one-as") monotonic, align 4
store i32 %val, i32* %out
ret void
}
-; GCN-LABEL: {{^}}system_acquire:
+; GCN-LABEL: {{^}}system_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @system_acquire(
+define amdgpu_kernel void @system_one_as_acquire(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in acquire, align 4
+ %val = load atomic i32, i32* %in syncscope("one-as") acquire, align 4
store i32 %val, i32* %out
ret void
}
-; GCN-LABEL: {{^}}system_seq_cst:
+; GCN-LABEL: {{^}}system_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @system_seq_cst(
+define amdgpu_kernel void @system_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in seq_cst, align 4
+ %val = load atomic i32, i32* %in syncscope("one-as") seq_cst, align 4
store i32 %val, i32* %out
ret void
}
-; GCN-LABEL: {{^}}singlethread_unordered:
+; GCN-LABEL: {{^}}singlethread_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @singlethread_unordered(
+define amdgpu_kernel void @singlethread_one_as_unordered(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in syncscope("singlethread") unordered, align 4
+ %val = load atomic i32, i32* %in syncscope("singlethread-one-as") unordered, align 4
store i32 %val, i32* %out
ret void
}
-; GCN-LABEL: {{^}}singlethread_monotonic:
+; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @singlethread_monotonic(
+define amdgpu_kernel void @singlethread_one_as_monotonic(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in syncscope("singlethread") monotonic, align 4
+ %val = load atomic i32, i32* %in syncscope("singlethread-one-as") monotonic, align 4
store i32 %val, i32* %out
ret void
}
-; GCN-LABEL: {{^}}singlethread_acquire:
+; GCN-LABEL: {{^}}singlethread_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @singlethread_acquire(
+define amdgpu_kernel void @singlethread_one_as_acquire(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in syncscope("singlethread") acquire, align 4
+ %val = load atomic i32, i32* %in syncscope("singlethread-one-as") acquire, align 4
store i32 %val, i32* %out
ret void
}
-; GCN-LABEL: {{^}}singlethread_seq_cst:
+; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @singlethread_seq_cst(
+define amdgpu_kernel void @singlethread_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in syncscope("singlethread") seq_cst, align 4
+ %val = load atomic i32, i32* %in syncscope("singlethread-one-as") seq_cst, align 4
store i32 %val, i32* %out
ret void
}
-; GCN-LABEL: {{^}}agent_unordered:
+; GCN-LABEL: {{^}}agent_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @agent_unordered(
+define amdgpu_kernel void @agent_one_as_unordered(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in syncscope("agent") unordered, align 4
+ %val = load atomic i32, i32* %in syncscope("agent-one-as") unordered, align 4
store i32 %val, i32* %out
ret void
}
-; GCN-LABEL: {{^}}agent_monotonic:
+; GCN-LABEL: {{^}}agent_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @agent_monotonic(
+define amdgpu_kernel void @agent_one_as_monotonic(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in syncscope("agent") monotonic, align 4
+ %val = load atomic i32, i32* %in syncscope("agent-one-as") monotonic, align 4
store i32 %val, i32* %out
ret void
}
-; GCN-LABEL: {{^}}agent_acquire:
+; GCN-LABEL: {{^}}agent_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @agent_acquire(
+define amdgpu_kernel void @agent_one_as_acquire(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in syncscope("agent") acquire, align 4
+ %val = load atomic i32, i32* %in syncscope("agent-one-as") acquire, align 4
store i32 %val, i32* %out
ret void
}
-; GCN-LABEL: {{^}}agent_seq_cst:
+; GCN-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @agent_seq_cst(
+define amdgpu_kernel void @agent_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in syncscope("agent") seq_cst, align 4
+ %val = load atomic i32, i32* %in syncscope("agent-one-as") seq_cst, align 4
store i32 %val, i32* %out
ret void
}
-; GCN-LABEL: {{^}}workgroup_unordered:
+; GCN-LABEL: {{^}}workgroup_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @workgroup_unordered(
+define amdgpu_kernel void @workgroup_one_as_unordered(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in syncscope("workgroup") unordered, align 4
+ %val = load atomic i32, i32* %in syncscope("workgroup-one-as") unordered, align 4
store i32 %val, i32* %out
ret void
}
-; GCN-LABEL: {{^}}workgroup_monotonic:
+; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @workgroup_monotonic(
+define amdgpu_kernel void @workgroup_one_as_monotonic(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in syncscope("workgroup") monotonic, align 4
+ %val = load atomic i32, i32* %in syncscope("workgroup-one-as") monotonic, align 4
store i32 %val, i32* %out
ret void
}
-; GCN-LABEL: {{^}}workgroup_acquire:
+; GCN-LABEL: {{^}}workgroup_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @workgroup_acquire(
+define amdgpu_kernel void @workgroup_one_as_acquire(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in syncscope("workgroup") acquire, align 4
+ %val = load atomic i32, i32* %in syncscope("workgroup-one-as") acquire, align 4
store i32 %val, i32* %out
ret void
}
-; GCN-LABEL: {{^}}workgroup_seq_cst:
+; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @workgroup_seq_cst(
+define amdgpu_kernel void @workgroup_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in syncscope("workgroup") seq_cst, align 4
+ %val = load atomic i32, i32* %in syncscope("workgroup-one-as") seq_cst, align 4
store i32 %val, i32* %out
ret void
}
-; GCN-LABEL: {{^}}wavefront_unordered:
+; GCN-LABEL: {{^}}wavefront_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @wavefront_unordered(
+define amdgpu_kernel void @wavefront_one_as_unordered(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in syncscope("wavefront") unordered, align 4
+ %val = load atomic i32, i32* %in syncscope("wavefront-one-as") unordered, align 4
store i32 %val, i32* %out
ret void
}
-; GCN-LABEL: {{^}}wavefront_monotonic:
+; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @wavefront_monotonic(
+define amdgpu_kernel void @wavefront_one_as_monotonic(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in syncscope("wavefront") monotonic, align 4
+ %val = load atomic i32, i32* %in syncscope("wavefront-one-as") monotonic, align 4
store i32 %val, i32* %out
ret void
}
-; GCN-LABEL: {{^}}wavefront_acquire:
+; GCN-LABEL: {{^}}wavefront_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @wavefront_acquire(
+define amdgpu_kernel void @wavefront_one_as_acquire(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in syncscope("wavefront") acquire, align 4
+ %val = load atomic i32, i32* %in syncscope("wavefront-one-as") acquire, align 4
store i32 %val, i32* %out
ret void
}
-; GCN-LABEL: {{^}}wavefront_seq_cst:
+; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GFX89-NOT: buffer_wbinvl1_vol
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
-define amdgpu_kernel void @wavefront_seq_cst(
+define amdgpu_kernel void @wavefront_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
- %val = load atomic i32, i32* %in syncscope("wavefront") seq_cst, align 4
+ %val = load atomic i32, i32* %in syncscope("wavefront-one-as") seq_cst, align 4
store i32 %val, i32* %out
ret void
}
@@ -373,5 +373,285 @@ entry:
store i32 %val, i32* %out
ret void
}
+
+; GCN-LABEL: {{^}}system_unordered:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @system_unordered(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in unordered, align 4
+ store i32 %val, i32* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @system_monotonic(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in monotonic, align 4
+ store i32 %val, i32* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NEXT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @system_acquire(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in acquire, align 4
+ store i32 %val, i32* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_seq_cst:
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NEXT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @system_seq_cst(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in seq_cst, align 4
+ store i32 %val, i32* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_unordered:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @singlethread_unordered(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in syncscope("singlethread") unordered, align 4
+ store i32 %val, i32* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @singlethread_monotonic(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in syncscope("singlethread") monotonic, align 4
+ store i32 %val, i32* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @singlethread_acquire(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in syncscope("singlethread") acquire, align 4
+ store i32 %val, i32* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_seq_cst:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @singlethread_seq_cst(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in syncscope("singlethread") seq_cst, align 4
+ store i32 %val, i32* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_unordered:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @agent_unordered(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in syncscope("agent") unordered, align 4
+ store i32 %val, i32* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @agent_monotonic(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in syncscope("agent") monotonic, align 4
+ store i32 %val, i32* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NEXT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @agent_acquire(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in syncscope("agent") acquire, align 4
+ store i32 %val, i32* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_seq_cst:
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NEXT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @agent_seq_cst(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in syncscope("agent") seq_cst, align 4
+ store i32 %val, i32* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_unordered:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @workgroup_unordered(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in syncscope("workgroup") unordered, align 4
+ store i32 %val, i32* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @workgroup_monotonic(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in syncscope("workgroup") monotonic, align 4
+ store i32 %val, i32* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @workgroup_acquire(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in syncscope("workgroup") acquire, align 4
+ store i32 %val, i32* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_seq_cst:
+; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @workgroup_seq_cst(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in syncscope("workgroup") seq_cst, align 4
+ store i32 %val, i32* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_unordered:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @wavefront_unordered(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in syncscope("wavefront") unordered, align 4
+ store i32 %val, i32* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @wavefront_monotonic(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in syncscope("wavefront") monotonic, align 4
+ store i32 %val, i32* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @wavefront_acquire(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in syncscope("wavefront") acquire, align 4
+ store i32 %val, i32* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_seq_cst:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+define amdgpu_kernel void @wavefront_seq_cst(
+ i32* %in, i32* %out) {
+entry:
+ %val = load atomic i32, i32* %in syncscope("wavefront") seq_cst, align 4
+ store i32 %val, i32* %out
+ ret void
+}
!0 = !{i32 1}
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir?rev=356946&r1=356945&r2=356946&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir Mon Mar 25 13:50:21 2019
@@ -17,7 +17,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") unordered 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -41,7 +41,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") monotonic 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -65,7 +65,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") acquire 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -89,7 +89,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") seq_cst 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -113,7 +113,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") unordered 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -137,7 +137,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") monotonic 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -161,7 +161,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") acquire 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -185,7 +185,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") seq_cst 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -209,7 +209,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") unordered 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -233,7 +233,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") monotonic 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -257,7 +257,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") acquire 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -281,7 +281,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") seq_cst 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -305,7 +305,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") unordered 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -329,7 +329,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") monotonic 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -353,7 +353,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") acquire 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -377,7 +377,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent") seq_cst 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -401,7 +401,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load unordered 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -425,7 +425,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load monotonic 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -449,7 +449,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load acquire 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -473,7 +473,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load seq_cst 4 from `i32 addrspace(3)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(3)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -498,7 +498,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -520,7 +520,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -542,7 +542,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -564,7 +564,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -586,7 +586,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") unordered 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") unordered 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -608,7 +608,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") monotonic 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -630,7 +630,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") release 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") release 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -652,7 +652,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") seq_cst 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -674,7 +674,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") unordered 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") unordered 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -696,7 +696,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") monotonic 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -718,7 +718,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") release 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") release 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -740,7 +740,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") seq_cst 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -762,7 +762,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") unordered 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") unordered 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -784,7 +784,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") monotonic 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -806,7 +806,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") release 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") release 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -828,7 +828,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent") seq_cst 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -850,7 +850,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store unordered 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") unordered 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -872,7 +872,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store monotonic 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") monotonic 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -894,7 +894,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store release 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") release 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -916,7 +916,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store seq_cst 4 into `i32 addrspace(3)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -938,7 +938,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(3)* undef`)
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -960,7 +960,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(3)* undef`)
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -982,7 +982,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acquire 4 into `i32 addrspace(3)* undef`)
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acquire 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -1004,7 +1004,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(3)* undef`)
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -1026,7 +1026,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acq_rel 4 into `i32 addrspace(3)* undef`)
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acq_rel 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
@@ -1048,7 +1048,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(3)* undef`)
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 0, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(3)* undef`)
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-mesa3d.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-mesa3d.ll?rev=356946&r1=356945&r2=356946&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-mesa3d.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-mesa3d.ll Mon Mar 25 13:50:21 2019
@@ -3,10 +3,228 @@
; RUN: llc -mtriple=amdgcn--mesa3d -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
; RUN: llc -mtriple=amdgcn--mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
-; FUNC-LABEL: {{^}}system_acquire:
+; FUNC-LABEL: {{^}}system_one_as_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @system_one_as_acquire() {
+entry:
+ fence syncscope("one-as") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}system_one_as_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @system_one_as_release() {
+entry:
+ fence syncscope("one-as") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}system_one_as_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @system_one_as_acq_rel() {
+entry:
+ fence syncscope("one-as") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}system_one_as_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @system_one_as_seq_cst() {
+entry:
+ fence syncscope("one-as") seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}singlethread_one_as_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_one_as_acquire() {
+entry:
+ fence syncscope("singlethread-one-as") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}singlethread_one_as_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_one_as_release() {
+entry:
+ fence syncscope("singlethread-one-as") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}singlethread_one_as_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_one_as_acq_rel() {
+entry:
+ fence syncscope("singlethread-one-as") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}singlethread_one_as_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_one_as_seq_cst() {
+entry:
+ fence syncscope("singlethread-one-as") seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}agent_one_as_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_one_as_acquire() {
+entry:
+ fence syncscope("agent-one-as") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}agent_one_as_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_one_as_release() {
+entry:
+ fence syncscope("agent-one-as") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}agent_one_as_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_one_as_acq_rel() {
+entry:
+ fence syncscope("agent-one-as") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_one_as_seq_cst() {
+entry:
+ fence syncscope("agent-one-as") seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
+; GCN: %bb.0
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_one_as_acquire() {
+entry:
+ fence syncscope("workgroup-one-as") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_one_as_release:
+; GCN: %bb.0
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_one_as_release() {
+entry:
+ fence syncscope("workgroup-one-as") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_one_as_acq_rel() {
+entry:
+ fence syncscope("workgroup-one-as") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_one_as_seq_cst() {
+entry:
+ fence syncscope("workgroup-one-as") seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}wavefront_one_as_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_one_as_acquire() {
+entry:
+ fence syncscope("wavefront-one-as") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}wavefront_one_as_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_one_as_release() {
+entry:
+ fence syncscope("wavefront-one-as") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}wavefront_one_as_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_one_as_acq_rel() {
+entry:
+ fence syncscope("wavefront-one-as") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}wavefront_one_as_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_one_as_seq_cst() {
+entry:
+ fence syncscope("wavefront-one-as") seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}system_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_acquire() {
@@ -18,7 +236,7 @@ entry:
; FUNC-LABEL: {{^}}system_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_release() {
entry:
@@ -29,7 +247,7 @@ entry:
; FUNC-LABEL: {{^}}system_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_acq_rel() {
@@ -41,7 +259,7 @@ entry:
; FUNC-LABEL: {{^}}system_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @system_seq_cst() {
@@ -93,7 +311,7 @@ entry:
; FUNC-LABEL: {{^}}agent_acquire:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_acquire() {
@@ -105,7 +323,7 @@ entry:
; FUNC-LABEL: {{^}}agent_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_release() {
entry:
@@ -116,7 +334,7 @@ entry:
; FUNC-LABEL: {{^}}agent_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_acq_rel() {
@@ -128,7 +346,7 @@ entry:
; FUNC-LABEL: {{^}}agent_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @agent_seq_cst() {
@@ -139,7 +357,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_acquire:
; GCN: %bb.0
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acquire() {
@@ -150,7 +368,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_release:
; GCN: %bb.0
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_release() {
@@ -161,7 +379,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_acq_rel:
; GCN: %bb.0
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acq_rel() {
@@ -172,7 +390,7 @@ entry:
; FUNC-LABEL: {{^}}workgroup_seq_cst:
; GCN: %bb.0
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
define amdgpu_kernel void @workgroup_seq_cst() {
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir?rev=356946&r1=356945&r2=356946&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir Mon Mar 25 13:50:21 2019
@@ -55,7 +55,7 @@ body: |
S_WAITCNT 127
$sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
$vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
- $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`)
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup-one-as") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`)
$vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
$vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
S_WAITCNT 3952
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir?rev=356946&r1=356945&r2=356946&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir Mon Mar 25 13:50:21 2019
@@ -17,7 +17,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 1, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") unordered 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 1, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -41,7 +41,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") monotonic 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -65,7 +65,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") acquire 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -89,7 +89,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread") seq_cst 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -113,7 +113,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") unordered 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -137,7 +137,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") monotonic 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -161,7 +161,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") acquire 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -185,7 +185,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront") seq_cst 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -209,7 +209,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") unordered 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -233,7 +233,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") monotonic 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -257,7 +257,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") acquire 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -281,7 +281,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup") seq_cst 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -305,7 +305,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") unordered 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -329,7 +329,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") monotonic 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -353,7 +353,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") acquire 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -377,7 +377,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent") seq_cst 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -401,7 +401,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load unordered 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -425,7 +425,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load monotonic 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -449,7 +449,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load acquire 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -473,7 +473,7 @@ body: |
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4)
$m0 = S_MOV_B32 -1
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
- renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load seq_cst 4 from `i32 addrspace(2)* undef`)
+ renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(2)* undef`)
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1
$vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`)
@@ -498,7 +498,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(2)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -520,7 +520,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(2)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -542,7 +542,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(2)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -564,7 +564,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(2)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -586,7 +586,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") unordered 4 into `i32 addrspace(2)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") unordered 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -608,7 +608,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") monotonic 4 into `i32 addrspace(2)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -630,7 +630,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") release 4 into `i32 addrspace(2)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") release 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -652,7 +652,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront") seq_cst 4 into `i32 addrspace(2)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("wavefront-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -674,7 +674,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") unordered 4 into `i32 addrspace(2)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") unordered 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -696,7 +696,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") monotonic 4 into `i32 addrspace(2)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -718,7 +718,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") release 4 into `i32 addrspace(2)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") release 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -740,7 +740,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup") seq_cst 4 into `i32 addrspace(2)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("workgroup-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -762,7 +762,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") unordered 4 into `i32 addrspace(2)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") unordered 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
---
@@ -783,7 +783,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") monotonic 4 into `i32 addrspace(2)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -805,7 +805,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") release 4 into `i32 addrspace(2)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") release 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -827,7 +827,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent") seq_cst 4 into `i32 addrspace(2)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -893,7 +893,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store release 4 into `i32 addrspace(2)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") release 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -915,7 +915,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store seq_cst 4 into `i32 addrspace(2)* undef`)
+ DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -937,7 +937,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") unordered 4 into `i32 addrspace(2)* undef`)
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") unordered 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -959,7 +959,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") monotonic 4 into `i32 addrspace(2)* undef`)
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") monotonic 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -981,7 +981,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acquire 4 into `i32 addrspace(2)* undef`)
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acquire 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -1003,7 +1003,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") release 4 into `i32 addrspace(2)* undef`)
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") release 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -1025,7 +1025,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") acq_rel 4 into `i32 addrspace(2)* undef`)
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") acq_rel 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
@@ -1047,7 +1047,7 @@ body: |
$m0 = S_MOV_B32 -1
$vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec
$vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
- $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread") seq_cst 4 into `i32 addrspace(2)* undef`)
+ $vgpr2 = DS_WRXCHG_RTN_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("singlethread-one-as") seq_cst 4 into `i32 addrspace(2)* undef`)
S_ENDPGM 0
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll?rev=356946&r1=356945&r2=356946&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll Mon Mar 25 13:50:21 2019
@@ -5,203 +5,203 @@
declare i32 @llvm.amdgcn.workitem.id.x()
-; GCN-LABEL: {{^}}system_unordered:
+; GCN-LABEL: {{^}}system_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @system_unordered(
+define amdgpu_kernel void @system_one_as_unordered(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out unordered, align 4
+ store atomic i32 %in, i32* %out syncscope("one-as") unordered, align 4
ret void
}
-; GCN-LABEL: {{^}}system_monotonic:
+; GCN-LABEL: {{^}}system_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @system_monotonic(
+define amdgpu_kernel void @system_one_as_monotonic(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out monotonic, align 4
+ store atomic i32 %in, i32* %out syncscope("one-as") monotonic, align 4
ret void
}
-; GCN-LABEL: {{^}}system_release:
+; GCN-LABEL: {{^}}system_one_as_release:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @system_release(
+define amdgpu_kernel void @system_one_as_release(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out release, align 4
+ store atomic i32 %in, i32* %out syncscope("one-as") release, align 4
ret void
}
-; GCN-LABEL: {{^}}system_seq_cst:
+; GCN-LABEL: {{^}}system_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @system_seq_cst(
+define amdgpu_kernel void @system_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out seq_cst, align 4
+ store atomic i32 %in, i32* %out syncscope("one-as") seq_cst, align 4
ret void
}
-; GCN-LABEL: {{^}}singlethread_unordered:
+; GCN-LABEL: {{^}}singlethread_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @singlethread_unordered(
+define amdgpu_kernel void @singlethread_one_as_unordered(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out syncscope("singlethread") unordered, align 4
+ store atomic i32 %in, i32* %out syncscope("singlethread-one-as") unordered, align 4
ret void
}
-; GCN-LABEL: {{^}}singlethread_monotonic:
+; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @singlethread_monotonic(
+define amdgpu_kernel void @singlethread_one_as_monotonic(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out syncscope("singlethread") monotonic, align 4
+ store atomic i32 %in, i32* %out syncscope("singlethread-one-as") monotonic, align 4
ret void
}
-; GCN-LABEL: {{^}}singlethread_release:
+; GCN-LABEL: {{^}}singlethread_one_as_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @singlethread_release(
+define amdgpu_kernel void @singlethread_one_as_release(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out syncscope("singlethread") release, align 4
+ store atomic i32 %in, i32* %out syncscope("singlethread-one-as") release, align 4
ret void
}
-; GCN-LABEL: {{^}}singlethread_seq_cst:
+; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @singlethread_seq_cst(
+define amdgpu_kernel void @singlethread_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out syncscope("singlethread") seq_cst, align 4
+ store atomic i32 %in, i32* %out syncscope("singlethread-one-as") seq_cst, align 4
ret void
}
-; GCN-LABEL: {{^}}agent_unordered:
+; GCN-LABEL: {{^}}agent_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @agent_unordered(
+define amdgpu_kernel void @agent_one_as_unordered(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out syncscope("agent") unordered, align 4
+ store atomic i32 %in, i32* %out syncscope("agent-one-as") unordered, align 4
ret void
}
-; GCN-LABEL: {{^}}agent_monotonic:
+; GCN-LABEL: {{^}}agent_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @agent_monotonic(
+define amdgpu_kernel void @agent_one_as_monotonic(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out syncscope("agent") monotonic, align 4
+ store atomic i32 %in, i32* %out syncscope("agent-one-as") monotonic, align 4
ret void
}
-; GCN-LABEL: {{^}}agent_release:
+; GCN-LABEL: {{^}}agent_one_as_release:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @agent_release(
+define amdgpu_kernel void @agent_one_as_release(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out syncscope("agent") release, align 4
+ store atomic i32 %in, i32* %out syncscope("agent-one-as") release, align 4
ret void
}
-; GCN-LABEL: {{^}}agent_seq_cst:
+; GCN-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @agent_seq_cst(
+define amdgpu_kernel void @agent_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out syncscope("agent") seq_cst, align 4
+ store atomic i32 %in, i32* %out syncscope("agent-one-as") seq_cst, align 4
ret void
}
-; GCN-LABEL: {{^}}workgroup_unordered:
+; GCN-LABEL: {{^}}workgroup_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @workgroup_unordered(
+define amdgpu_kernel void @workgroup_one_as_unordered(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out syncscope("workgroup") unordered, align 4
+ store atomic i32 %in, i32* %out syncscope("workgroup-one-as") unordered, align 4
ret void
}
-; GCN-LABEL: {{^}}workgroup_monotonic:
+; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @workgroup_monotonic(
+define amdgpu_kernel void @workgroup_one_as_monotonic(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out syncscope("workgroup") monotonic, align 4
+ store atomic i32 %in, i32* %out syncscope("workgroup-one-as") monotonic, align 4
ret void
}
-; GCN-LABEL: {{^}}workgroup_release:
+; GCN-LABEL: {{^}}workgroup_one_as_release:
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @workgroup_release(
+define amdgpu_kernel void @workgroup_one_as_release(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out syncscope("workgroup") release, align 4
+ store atomic i32 %in, i32* %out syncscope("workgroup-one-as") release, align 4
ret void
}
-; GCN-LABEL: {{^}}workgroup_seq_cst:
+; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @workgroup_seq_cst(
+define amdgpu_kernel void @workgroup_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out syncscope("workgroup") seq_cst, align 4
+ store atomic i32 %in, i32* %out syncscope("workgroup-one-as") seq_cst, align 4
ret void
}
-; GCN-LABEL: {{^}}wavefront_unordered:
+; GCN-LABEL: {{^}}wavefront_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @wavefront_unordered(
+define amdgpu_kernel void @wavefront_one_as_unordered(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out syncscope("wavefront") unordered, align 4
+ store atomic i32 %in, i32* %out syncscope("wavefront-one-as") unordered, align 4
ret void
}
-; GCN-LABEL: {{^}}wavefront_monotonic:
+; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @wavefront_monotonic(
+define amdgpu_kernel void @wavefront_one_as_monotonic(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out syncscope("wavefront") monotonic, align 4
+ store atomic i32 %in, i32* %out syncscope("wavefront-one-as") monotonic, align 4
ret void
}
-; GCN-LABEL: {{^}}wavefront_release:
+; GCN-LABEL: {{^}}wavefront_one_as_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @wavefront_release(
+define amdgpu_kernel void @wavefront_one_as_release(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out syncscope("wavefront") release, align 4
+ store atomic i32 %in, i32* %out syncscope("wavefront-one-as") release, align 4
ret void
}
-; GCN-LABEL: {{^}}wavefront_seq_cst:
+; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
-define amdgpu_kernel void @wavefront_seq_cst(
+define amdgpu_kernel void @wavefront_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
- store atomic i32 %in, i32* %out syncscope("wavefront") seq_cst, align 4
+ store atomic i32 %in, i32* %out syncscope("wavefront-one-as") seq_cst, align 4
ret void
}
@@ -295,4 +295,204 @@ entry:
ret void
}
+; GCN-LABEL: {{^}}system_unordered:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @system_unordered(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out unordered, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @system_monotonic(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out monotonic, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_release:
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @system_release(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out release, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_seq_cst:
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @system_seq_cst(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out seq_cst, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_unordered:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @singlethread_unordered(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out syncscope("singlethread") unordered, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @singlethread_monotonic(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out syncscope("singlethread") monotonic, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_release:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @singlethread_release(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out syncscope("singlethread") release, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_seq_cst:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @singlethread_seq_cst(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out syncscope("singlethread") seq_cst, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_unordered:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @agent_unordered(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out syncscope("agent") unordered, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @agent_monotonic(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out syncscope("agent") monotonic, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_release:
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @agent_release(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out syncscope("agent") release, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_seq_cst:
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @agent_seq_cst(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out syncscope("agent") seq_cst, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_unordered:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @workgroup_unordered(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out syncscope("workgroup") unordered, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @workgroup_monotonic(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out syncscope("workgroup") monotonic, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_release:
+; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @workgroup_release(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out syncscope("workgroup") release, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_seq_cst:
+; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @workgroup_seq_cst(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out syncscope("workgroup") seq_cst, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_unordered:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @wavefront_unordered(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out syncscope("wavefront") unordered, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @wavefront_monotonic(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out syncscope("wavefront") monotonic, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_release:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @wavefront_release(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out syncscope("wavefront") release, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}wavefront_seq_cst:
+; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+define amdgpu_kernel void @wavefront_seq_cst(
+ i32 %in, i32* %out) {
+entry:
+ store atomic i32 %in, i32* %out syncscope("wavefront") seq_cst, align 4
+ ret void
+}
+
!0 = !{i32 1}
More information about the llvm-commits
mailing list