[llvm] r360087 - [AMDGPU] gfx1010 memory legalizer
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Mon May 6 14:57:02 PDT 2019
Author: rampitec
Date: Mon May 6 14:57:02 2019
New Revision: 360087
URL: http://llvm.org/viewvc/llvm-project?rev=360087&view=rev
Log:
[AMDGPU] gfx1010 memory legalizer
Differential Revision: https://reviews.llvm.org/D61535
Modified:
llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-fence.ll
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll
llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp?rev=360087&r1=360086&r2=360087&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp Mon May 6 14:57:02 2019
@@ -352,6 +352,40 @@ public:
};
+class SIGfx10CacheControl : public SIGfx7CacheControl {
+protected:
+ bool CuMode = false;
+
+ /// Sets DLC bit to "true" if present in \p MI. Returns true if \p MI
+ /// is modified, false otherwise.
+ bool enableDLCBit(const MachineBasicBlock::iterator &MI) const {
+ return enableNamedBit<AMDGPU::OpName::dlc>(MI);
+ }
+
+public:
+
+ SIGfx10CacheControl(const GCNSubtarget &ST, bool CuMode) :
+ SIGfx7CacheControl(ST), CuMode(CuMode) {};
+
+ bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace) const override;
+
+ bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
+
+ bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const override;
+
+ bool insertWait(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ SIMemOp Op,
+ bool IsCrossAddrSpaceOrdering,
+ Position Pos) const override;
+};
+
class SIMemoryLegalizer final : public MachineFunctionPass {
private:
@@ -623,7 +657,9 @@ std::unique_ptr<SICacheControl> SICacheC
GCNSubtarget::Generation Generation = ST.getGeneration();
if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
return make_unique<SIGfx6CacheControl>(ST);
- return make_unique<SIGfx7CacheControl>(ST);
+ if (Generation < AMDGPUSubtarget::GFX10)
+ return make_unique<SIGfx7CacheControl>(ST);
+ return make_unique<SIGfx10CacheControl>(ST, ST.isCuModeEnabled());
}
bool SIGfx6CacheControl::enableLoadCacheBypass(
@@ -856,6 +892,231 @@ bool SIGfx7CacheControl::insertCacheInva
if (Pos == Position::AFTER)
--MI;
+
+ return Changed;
+}
+
+bool SIGfx10CacheControl::enableLoadCacheBypass(
+ const MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace) const {
+ assert(MI->mayLoad() && !MI->mayStore());
+ bool Changed = false;
+
+ if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ /// TODO Do not set glc for rmw atomic operations as they
+ /// implicitly bypass the L0/L1 caches.
+
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ case SIAtomicScope::AGENT:
+ Changed |= enableGLCBit(MI);
+ Changed |= enableDLCBit(MI);
+ break;
+ case SIAtomicScope::WORKGROUP:
+ // In WGP mode the waves of a work-group can be executing on either CU of
+ // the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
+ // CU mode and all waves of a work-group are on the same CU, and so the
+ // L0 does not need to be bypassed.
+ if (!CuMode) Changed |= enableGLCBit(MI);
+ break;
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // No cache to bypass.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ /// The scratch address space does not need the global memory caches
+ /// to be bypassed as all memory operations by the same thread are
+ /// sequentially consistent, and no other thread can access scratch
+ /// memory.
+
+ /// Other address spaces do not hava a cache.
+
+ return Changed;
+}
+
+bool SIGfx10CacheControl::enableNonTemporal(
+ const MachineBasicBlock::iterator &MI) const {
+ assert(MI->mayLoad() ^ MI->mayStore());
+ bool Changed = false;
+
+ Changed |= enableSLCBit(MI);
+ /// TODO for store (non-rmw atomic) instructions also enableGLCBit(MI)
+
+ return Changed;
+}
+
+bool SIGfx10CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const {
+ bool Changed = false;
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ if (Pos == Position::AFTER)
+ ++MI;
+
+ if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ case SIAtomicScope::AGENT:
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL1_INV));
+ Changed = true;
+ break;
+ case SIAtomicScope::WORKGROUP:
+ // In WGP mode the waves of a work-group can be executing on either CU of
+ // the WGP. Therefore need to invalidate the L0 which is per CU. Otherwise
+ // in CU mode and all waves of a work-group are on the same CU, and so the
+ // L0 does not need to be invalidated.
+ if (!CuMode) {
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
+ Changed = true;
+ }
+ break;
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // No cache to invalidate.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ /// The scratch address space does not need the global memory cache
+ /// to be flushed as all memory operations by the same thread are
+ /// sequentially consistent, and no other thread can access scratch
+ /// memory.
+
+ /// Other address spaces do not hava a cache.
+
+ if (Pos == Position::AFTER)
+ --MI;
+
+ return Changed;
+}
+
+bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ SIMemOp Op,
+ bool IsCrossAddrSpaceOrdering,
+ Position Pos) const {
+ bool Changed = false;
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ if (Pos == Position::AFTER)
+ ++MI;
+
+ bool VMCnt = false;
+ bool VSCnt = false;
+ bool LGKMCnt = false;
+
+ if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ case SIAtomicScope::AGENT:
+ if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
+ VMCnt |= true;
+ if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
+ VSCnt |= true;
+ break;
+ case SIAtomicScope::WORKGROUP:
+ // In WGP mode the waves of a work-group can be executing on either CU of
+ // the WGP. Therefore need to wait for operations to complete to ensure
+ // they are visible to waves in the other CU as the L0 is per CU.
+ // Otherwise in CU mode and all waves of a work-group are on the same CU
+ // which shares the same L0.
+ if (!CuMode) {
+ if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
+ VMCnt |= true;
+ if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
+ VSCnt |= true;
+ }
+ break;
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // The L0 cache keeps all memory operations in order for
+ // work-items in the same wavefront.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ case SIAtomicScope::AGENT:
+ case SIAtomicScope::WORKGROUP:
+ // If no cross address space ordering then an LDS waitcnt is not
+ // needed as LDS operations for all waves are executed in a
+ // total global ordering as observed by all waves. Required if
+ // also synchronizing with global/GDS memory as LDS operations
+ // could be reordered with respect to later global/GDS memory
+ // operations of the same wave.
+ LGKMCnt |= IsCrossAddrSpaceOrdering;
+ break;
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // The LDS keeps all memory operations in order for
+ // the same wavesfront.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ case SIAtomicScope::AGENT:
+ // If no cross address space ordering then an GDS waitcnt is not
+ // needed as GDS operations for all waves are executed in a
+ // total global ordering as observed by all waves. Required if
+ // also synchronizing with global/LDS memory as GDS operations
+ // could be reordered with respect to later global/LDS memory
+ // operations of the same wave.
+ LGKMCnt |= IsCrossAddrSpaceOrdering;
+ break;
+ case SIAtomicScope::WORKGROUP:
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // The GDS keeps all memory operations in order for
+ // the same work-group.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ if (VMCnt || LGKMCnt) {
+ unsigned WaitCntImmediate =
+ AMDGPU::encodeWaitcnt(IV,
+ VMCnt ? 0 : getVmcntBitMask(IV),
+ getExpcntBitMask(IV),
+ LGKMCnt ? 0 : getLgkmcntBitMask(IV));
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
+ Changed = true;
+ }
+
+ if (VSCnt) {
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT))
+ .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
+ .addImm(0);
+ Changed = true;
+ }
+
+ if (Pos == Position::AFTER)
+ --MI;
return Changed;
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll?rev=360087&r1=360086&r2=360087&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-cmpxchg.ll Mon May 6 14:57:02 2019
@@ -1,141 +1,20 @@
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
-
-; GCN-LABEL: {{^}}system_monotonic_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @system_monotonic_monotonic(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in monotonic monotonic
- ret void
-}
-
-; GCN-LABEL: {{^}}system_acquire_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @system_acquire_monotonic(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire monotonic
- ret void
-}
-
-; GCN-LABEL: {{^}}system_release_monotonic:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @system_release_monotonic(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release monotonic
- ret void
-}
-
-; GCN-LABEL: {{^}}system_acq_rel_monotonic:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @system_acq_rel_monotonic(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel monotonic
- ret void
-}
-
-; GCN-LABEL: {{^}}system_seq_cst_monotonic:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @system_seq_cst_monotonic(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst monotonic
- ret void
-}
-
-; GCN-LABEL: {{^}}system_acquire_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @system_acquire_acquire(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire acquire
- ret void
-}
-
-; GCN-LABEL: {{^}}system_release_acquire:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @system_release_acquire(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release acquire
- ret void
-}
-
-; GCN-LABEL: {{^}}system_acq_rel_acquire:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @system_acq_rel_acquire(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel acquire
- ret void
-}
-
-; GCN-LABEL: {{^}}system_seq_cst_acquire:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @system_seq_cst_acquire(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst acquire
- ret void
-}
-
-; GCN-LABEL: {{^}}system_seq_cst_seq_cst:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @system_seq_cst_seq_cst(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
- ret void
-}
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10CU %s
; GCN-LABEL: {{^}}system_one_as_monotonic_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
+; GFX10: .amdhsa_kernel system_one_as_monotonic_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_monotonic_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -146,9 +25,17 @@ entry:
; GCN-LABEL: {{^}}system_one_as_acquire_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_acquire_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_acquire_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -159,9 +46,16 @@ entry:
; GCN-LABEL: {{^}}system_one_as_release_monotonic:
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl._inv
+; GFX10: .amdhsa_kernel system_one_as_release_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_release_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -172,9 +66,17 @@ entry:
; GCN-LABEL: {{^}}system_one_as_acq_rel_monotonic:
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_acq_rel_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_acq_rel_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -185,9 +87,17 @@ entry:
; GCN-LABEL: {{^}}system_one_as_seq_cst_monotonic:
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_seq_cst_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_seq_cst_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -198,9 +108,17 @@ entry:
; GCN-LABEL: {{^}}system_one_as_acquire_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_acquire_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_acquire_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -211,9 +129,17 @@ entry:
; GCN-LABEL: {{^}}system_one_as_release_acquire:
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_release_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_release_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -224,9 +150,17 @@ entry:
; GCN-LABEL: {{^}}system_one_as_acq_rel_acquire:
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_acq_rel_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_acq_rel_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -237,9 +171,17 @@ entry:
; GCN-LABEL: {{^}}system_one_as_seq_cst_acquire:
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_seq_cst_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_seq_cst_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -250,9 +192,17 @@ entry:
; GCN-LABEL: {{^}}system_one_as_seq_cst_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_seq_cst_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_seq_cst_seq_cst(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -261,141 +211,18 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}singlethread_monotonic_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @singlethread_monotonic_monotonic(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic
- ret void
-}
-
-; GCN-LABEL: {{^}}singlethread_acquire_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @singlethread_acquire_monotonic(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic
- ret void
-}
-
-; GCN-LABEL: {{^}}singlethread_release_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @singlethread_release_monotonic(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic
- ret void
-}
-
-; GCN-LABEL: {{^}}singlethread_acq_rel_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @singlethread_acq_rel_monotonic(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic
- ret void
-}
-
-; GCN-LABEL: {{^}}singlethread_seq_cst_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @singlethread_seq_cst_monotonic(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic
- ret void
-}
-
-; GCN-LABEL: {{^}}singlethread_acquire_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @singlethread_acquire_acquire(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire
- ret void
-}
-
-; GCN-LABEL: {{^}}singlethread_release_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @singlethread_release_acquire(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire
- ret void
-}
-
-; GCN-LABEL: {{^}}singlethread_acq_rel_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @singlethread_acq_rel_acquire(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire
- ret void
-}
-
-; GCN-LABEL: {{^}}singlethread_seq_cst_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @singlethread_seq_cst_acquire(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire
- ret void
-}
-
-; GCN-LABEL: {{^}}singlethread_seq_cst_seq_cst:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @singlethread_seq_cst_seq_cst(
- i32* %out, i32 %in, i32 %old) {
-entry:
- %gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst
- ret void
-}
-
; GCN-LABEL: {{^}}singlethread_one_as_monotonic_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
+; GFX10: .amdhsa_kernel singlethread_one_as_monotonic_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_monotonic_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -406,9 +233,16 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_acquire_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
+; GFX10: .amdhsa_kernel singlethread_one_as_acquire_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_acquire_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -419,9 +253,16 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_release_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; Gfx8-NOT: buffer_wbinvl1_vol
+; GCN-NOT: buffer_gl{{[01]}}_inv
+; GFX10: .amdhsa_kernel singlethread_one_as_release_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_release_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -432,9 +273,16 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_acq_rel_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel singlethread_one_as_acq_rel_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_acq_rel_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -445,9 +293,16 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_seq_cst_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_seq_cst_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -458,9 +313,16 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_acquire_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel singlethread_one_as_acquire_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_acquire_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -471,9 +333,16 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_release_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel singlethread_one_as_release_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_release_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -484,9 +353,16 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_acq_rel_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel singlethread_one_as_acq_rel_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_acq_rel_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -497,9 +373,16 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_seq_cst_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_seq_cst_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -510,9 +393,16 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_seq_cst_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_seq_cst_seq_cst(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -521,531 +411,2112 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}agent_monotonic_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-LABEL: {{^}}agent_one_as_monotonic_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_monotonic_monotonic(
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel agent_one_as_monotonic_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_monotonic_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic
ret void
}
-; GCN-LABEL: {{^}}agent_acquire_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-LABEL: {{^}}agent_one_as_acquire_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_acquire_monotonic(
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_acquire_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_acquire_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic
ret void
}
-; GCN-LABEL: {{^}}agent_release_monotonic:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-LABEL: {{^}}agent_one_as_release_monotonic:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_release_monotonic(
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel agent_one_as_release_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_release_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic
ret void
}
-; GCN-LABEL: {{^}}agent_acq_rel_monotonic:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-LABEL: {{^}}agent_one_as_acq_rel_monotonic:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_acq_rel_monotonic(
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_acq_rel_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_acq_rel_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic
ret void
}
-; GCN-LABEL: {{^}}agent_seq_cst_monotonic:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-LABEL: {{^}}agent_one_as_seq_cst_monotonic:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_seq_cst_monotonic(
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_seq_cst_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_seq_cst_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic
ret void
}
-; GCN-LABEL: {{^}}agent_acquire_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-LABEL: {{^}}agent_one_as_acquire_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_acquire_acquire(
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_acquire_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_acquire_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire
ret void
}
-; GCN-LABEL: {{^}}agent_release_acquire:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-LABEL: {{^}}agent_one_as_release_acquire:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_release_acquire(
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_release_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_release_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire
ret void
}
-; GCN-LABEL: {{^}}agent_acq_rel_acquire:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-LABEL: {{^}}agent_one_as_acq_rel_acquire:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_acq_rel_acquire(
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_acq_rel_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_acq_rel_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire
ret void
}
-; GCN-LABEL: {{^}}agent_seq_cst_acquire:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-LABEL: {{^}}agent_one_as_seq_cst_acquire:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_seq_cst_acquire(
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_seq_cst_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_seq_cst_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire
ret void
}
-; GCN-LABEL: {{^}}agent_seq_cst_seq_cst:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-LABEL: {{^}}agent_one_as_seq_cst_seq_cst:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_seq_cst_seq_cst(
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_seq_cst_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_seq_cst_seq_cst(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst
ret void
}
-; GCN-LABEL: {{^}}agent_one_as_monotonic_monotonic:
+; GCN-LABEL: {{^}}workgroup_one_as_monotonic_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_one_as_monotonic_monotonic(
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel workgroup_one_as_monotonic_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_monotonic_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic
ret void
}
-; GCN-LABEL: {{^}}agent_one_as_acquire_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
-; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_one_as_acquire_monotonic(
+; GCN-LABEL: {{^}}workgroup_one_as_acquire_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10: .amdhsa_kernel workgroup_one_as_acquire_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_acquire_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic
ret void
}
-; GCN-LABEL: {{^}}agent_one_as_release_monotonic:
-; GCN: s_waitcnt vmcnt(0){{$}}
-; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_one_as_release_monotonic(
+; GCN-LABEL: {{^}}workgroup_one_as_release_monotonic:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel workgroup_one_as_release_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_release_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic
ret void
}
-; GCN-LABEL: {{^}}agent_one_as_acq_rel_monotonic:
-; GCN: s_waitcnt vmcnt(0){{$}}
-; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
-; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_one_as_acq_rel_monotonic(
+; GCN-LABEL: {{^}}workgroup_one_as_acq_rel_monotonic:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_acq_rel_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic
ret void
}
-; GCN-LABEL: {{^}}agent_one_as_seq_cst_monotonic:
-; GCN: s_waitcnt vmcnt(0){{$}}
-; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
-; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_one_as_seq_cst_monotonic(
+; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_monotonic:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_seq_cst_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic
ret void
}
-; GCN-LABEL: {{^}}agent_one_as_acquire_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
-; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_one_as_acquire_acquire(
+; GCN-LABEL: {{^}}workgroup_one_as_acquire_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10: .amdhsa_kernel workgroup_one_as_acquire_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_acquire_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire
ret void
}
-; GCN-LABEL: {{^}}agent_one_as_release_acquire:
-; GCN: s_waitcnt vmcnt(0){{$}}
-; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
-; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_one_as_release_acquire(
+; GCN-LABEL: {{^}}workgroup_one_as_release_acquire:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_one_as_release_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_release_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire
ret void
}
-; GCN-LABEL: {{^}}agent_one_as_acq_rel_acquire:
-; GCN: s_waitcnt vmcnt(0){{$}}
-; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
-; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_one_as_acq_rel_acquire(
+; GCN-LABEL: {{^}}workgroup_one_as_acq_rel_acquire:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_acq_rel_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire
ret void
}
-; GCN-LABEL: {{^}}agent_one_as_seq_cst_acquire:
-; GCN: s_waitcnt vmcnt(0){{$}}
-; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
-; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_one_as_seq_cst_acquire(
+; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_acquire:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_seq_cst_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire
ret void
}
-; GCN-LABEL: {{^}}agent_one_as_seq_cst_seq_cst:
-; GCN: s_waitcnt vmcnt(0){{$}}
-; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
-; GFX8-NEXT: buffer_wbinvl1_vol
-define amdgpu_kernel void @agent_one_as_seq_cst_seq_cst(
+; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_seq_cst:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_seq_cst_seq_cst(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst
ret void
}
-; GCN-LABEL: {{^}}workgroup_monotonic_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-LABEL: {{^}}wavefront_one_as_monotonic_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_monotonic_monotonic(
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_one_as_monotonic_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @wavefront_one_as_monotonic_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic
ret void
}
-; GCN-LABEL: {{^}}workgroup_acquire_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_acquire_monotonic(
+; GCN-LABEL: {{^}}wavefront_one_as_acquire_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_one_as_acquire_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @wavefront_one_as_acquire_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic
ret void
}
-; GCN-LABEL: {{^}}workgroup_release_monotonic:
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_release_monotonic(
+; GCN-LABEL: {{^}}wavefront_one_as_release_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_one_as_release_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @wavefront_one_as_release_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release monotonic
ret void
}
-; GCN-LABEL: {{^}}workgroup_acq_rel_monotonic:
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_acq_rel_monotonic(
+; GCN-LABEL: {{^}}wavefront_one_as_acq_rel_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_one_as_acq_rel_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @wavefront_one_as_acq_rel_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic
ret void
}
-; GCN-LABEL: {{^}}workgroup_seq_cst_monotonic:
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_seq_cst_monotonic(
+; GCN-LABEL: {{^}}wavefront_one_as_seq_cst_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @wavefront_one_as_seq_cst_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic
ret void
}
-; GCN-LABEL: {{^}}workgroup_acquire_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_acquire_acquire(
+; GCN-LABEL: {{^}}wavefront_one_as_acquire_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_one_as_acquire_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @wavefront_one_as_acquire_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire
ret void
}
-; GCN-LABEL: {{^}}workgroup_release_acquire:
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_release_acquire(
+; GCN-LABEL: {{^}}wavefront_one_as_release_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_one_as_release_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @wavefront_one_as_release_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire
ret void
}
-; GCN-LABEL: {{^}}workgroup_acq_rel_acquire:
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_acq_rel_acquire(
+; GCN-LABEL: {{^}}wavefront_one_as_acq_rel_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_one_as_acq_rel_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @wavefront_one_as_acq_rel_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire
ret void
}
-; GCN-LABEL: {{^}}workgroup_seq_cst_acquire:
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_seq_cst_acquire(
+; GCN-LABEL: {{^}}wavefront_one_as_seq_cst_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @wavefront_one_as_seq_cst_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire
ret void
}
-; GCN-LABEL: {{^}}workgroup_seq_cst_seq_cst:
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_seq_cst_seq_cst(
+; GCN-LABEL: {{^}}wavefront_one_as_seq_cst_seq_cst:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @wavefront_one_as_seq_cst_seq_cst(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_acquire_monotonic_ret:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_acquire_monotonic_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_one_as_acquire_monotonic_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_acq_rel_monotonic_ret:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_acq_rel_monotonic_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_one_as_acq_rel_monotonic_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_seq_cst_monotonic_ret:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_seq_cst_monotonic_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_one_as_seq_cst_monotonic_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_acquire_acquire_ret:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_acquire_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_one_as_acquire_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_release_acquire_ret:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_release_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_one_as_release_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") release acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_acq_rel_acquire_ret:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_acq_rel_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_one_as_acq_rel_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_seq_cst_acquire_ret:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_seq_cst_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_one_as_seq_cst_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_seq_cst_seq_cst_ret:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_seq_cst_seq_cst_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_one_as_seq_cst_seq_cst_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_acquire_monotonic_ret:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_acquire_monotonic_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_acquire_monotonic_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_acq_rel_monotonic_ret:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_acq_rel_monotonic_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_acq_rel_monotonic_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_seq_cst_monotonic_ret:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_seq_cst_monotonic_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_seq_cst_monotonic_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_acquire_acquire_ret:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_acquire_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_acquire_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_release_acquire_ret:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_release_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_release_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_acq_rel_acquire_ret:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_acq_rel_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_acq_rel_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_seq_cst_acquire_ret:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_seq_cst_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_seq_cst_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_seq_cst_seq_cst_ret:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_seq_cst_seq_cst_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_seq_cst_seq_cst_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_acquire_monotonic_ret:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10: .amdhsa_kernel workgroup_one_as_acquire_monotonic_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_acquire_monotonic_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_acq_rel_monotonic_ret:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel_monotonic_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_acq_rel_monotonic_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_monotonic_ret:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_monotonic_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_seq_cst_monotonic_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_acquire_acquire_ret:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10: .amdhsa_kernel workgroup_one_as_acquire_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_acquire_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_release_acquire_ret:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_one_as_release_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_release_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_acq_rel_acquire_ret:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_acq_rel_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_acquire_ret:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_seq_cst_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_seq_cst_ret:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_seq_cst_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_seq_cst_seq_cst_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_monotonic_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
+; GFX10: .amdhsa_kernel system_monotonic_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_monotonic_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in monotonic monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_acquire_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_acquire_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_acquire_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_release_monotonic:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl._inv
+; GFX10: .amdhsa_kernel system_release_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_release_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_acq_rel_monotonic:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_acq_rel_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_acq_rel_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_seq_cst_monotonic:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_seq_cst_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_seq_cst_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_acquire_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_acquire_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_acquire_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_release_acquire:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_release_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_release_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_acq_rel_acquire:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_acq_rel_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_acq_rel_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_seq_cst_acquire:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_seq_cst_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_seq_cst_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_seq_cst_seq_cst:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_seq_cst_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_seq_cst_seq_cst(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_monotonic_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
+; GFX10: .amdhsa_kernel singlethread_monotonic_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @singlethread_monotonic_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_acquire_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
+; GFX10: .amdhsa_kernel singlethread_acquire_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @singlethread_acquire_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_release_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; Gfx8-NOT: buffer_wbinvl1_vol
+; GCN-NOT: buffer_gl{{[01]}}_inv
+; GFX10: .amdhsa_kernel singlethread_release_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @singlethread_release_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_acq_rel_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel singlethread_acq_rel_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @singlethread_acq_rel_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_seq_cst_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel singlethread_seq_cst_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @singlethread_seq_cst_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_acquire_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel singlethread_acquire_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @singlethread_acquire_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_release_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel singlethread_release_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @singlethread_release_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") release acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_acq_rel_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel singlethread_acq_rel_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @singlethread_acq_rel_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_seq_cst_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel singlethread_seq_cst_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @singlethread_seq_cst_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}singlethread_seq_cst_seq_cst:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel singlethread_seq_cst_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @singlethread_seq_cst_seq_cst(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_monotonic_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel agent_monotonic_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_monotonic_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_acquire_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_acquire_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_acquire_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_release_monotonic:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel agent_release_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_release_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_acq_rel_monotonic:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_acq_rel_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_acq_rel_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_seq_cst_monotonic:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_seq_cst_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_seq_cst_monotonic(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_acquire_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_acquire_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_acquire_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_release_acquire:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_release_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_release_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_acq_rel_acquire:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_acq_rel_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_acq_rel_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_seq_cst_acquire:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_seq_cst_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_seq_cst_acquire(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_seq_cst_seq_cst:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_seq_cst_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_seq_cst_seq_cst(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
ret void
}
-; GCN-LABEL: {{^}}workgroup_one_as_monotonic_monotonic:
+; GCN-LABEL: {{^}}workgroup_monotonic_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_one_as_monotonic_monotonic(
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel workgroup_monotonic_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_monotonic_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic
ret void
}
-; GCN-LABEL: {{^}}workgroup_one_as_acquire_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_one_as_acquire_monotonic(
+; GCN-LABEL: {{^}}workgroup_acquire_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10: .amdhsa_kernel workgroup_acquire_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_acquire_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic
ret void
}
-; GCN-LABEL: {{^}}workgroup_one_as_release_monotonic:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_one_as_release_monotonic(
+; GCN-LABEL: {{^}}workgroup_release_monotonic:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel workgroup_release_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_release_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic
ret void
}
-; GCN-LABEL: {{^}}workgroup_one_as_acq_rel_monotonic:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_one_as_acq_rel_monotonic(
+; GCN-LABEL: {{^}}workgroup_acq_rel_monotonic:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_acq_rel_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_acq_rel_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic
ret void
}
-; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_monotonic:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_one_as_seq_cst_monotonic(
+; GCN-LABEL: {{^}}workgroup_seq_cst_monotonic:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_seq_cst_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_seq_cst_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic
ret void
}
-; GCN-LABEL: {{^}}workgroup_one_as_acquire_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_one_as_acquire_acquire(
+; GCN-LABEL: {{^}}workgroup_acquire_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10: .amdhsa_kernel workgroup_acquire_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_acquire_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire
ret void
}
-; GCN-LABEL: {{^}}workgroup_one_as_release_acquire:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_one_as_release_acquire(
+; GCN-LABEL: {{^}}workgroup_release_acquire:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_release_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_release_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire
ret void
}
-; GCN-LABEL: {{^}}workgroup_one_as_acq_rel_acquire:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_one_as_acq_rel_acquire(
+; GCN-LABEL: {{^}}workgroup_acq_rel_acquire:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_acq_rel_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_acq_rel_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire
ret void
}
-; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_acquire:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_one_as_seq_cst_acquire(
+; GCN-LABEL: {{^}}workgroup_seq_cst_acquire:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_seq_cst_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_seq_cst_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire
ret void
}
-; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_seq_cst:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @workgroup_one_as_seq_cst_seq_cst(
+; GCN-LABEL: {{^}}workgroup_seq_cst_seq_cst:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_seq_cst_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_seq_cst_seq_cst(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst
ret void
}
; GCN-LABEL: {{^}}wavefront_monotonic_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_monotonic_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_monotonic_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -1055,10 +2526,17 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_acquire_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_acquire_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_acquire_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -1068,10 +2546,17 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_release_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_release_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_release_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -1081,10 +2566,17 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_acq_rel_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_acq_rel_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_acq_rel_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -1094,10 +2586,17 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_seq_cst_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_seq_cst_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_seq_cst_monotonic(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -1107,10 +2606,17 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_acquire_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_acquire_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_acquire_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -1120,10 +2626,17 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_release_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_release_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_release_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -1133,10 +2646,17 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_acq_rel_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_acq_rel_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_acq_rel_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -1146,10 +2666,17 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_seq_cst_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_seq_cst_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_seq_cst_acquire(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -1159,10 +2686,17 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_seq_cst_seq_cst:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}._inv
+; GFX10: .amdhsa_kernel wavefront_seq_cst_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_seq_cst_seq_cst(
i32* %out, i32 %in, i32 %old) {
entry:
@@ -1171,132 +2705,588 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}wavefront_one_as_monotonic_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @wavefront_one_as_monotonic_monotonic(
+; GCN-LABEL: {{^}}system_acquire_monotonic_ret:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_acquire_monotonic_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_acquire_monotonic_ret(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire monotonic
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
ret void
}
-; GCN-LABEL: {{^}}wavefront_one_as_acquire_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @wavefront_one_as_acquire_monotonic(
+; GCN-LABEL: {{^}}system_acq_rel_monotonic_ret:
+; GCN: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_acq_rel_monotonic_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_acq_rel_monotonic_ret(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel monotonic
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
ret void
}
-; GCN-LABEL: {{^}}wavefront_one_as_release_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @wavefront_one_as_release_monotonic(
+; GCN-LABEL: {{^}}system_seq_cst_monotonic_ret:
+; GCN: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_seq_cst_monotonic_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_seq_cst_monotonic_ret(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst monotonic
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
ret void
}
-; GCN-LABEL: {{^}}wavefront_one_as_acq_rel_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @wavefront_one_as_acq_rel_monotonic(
+; GCN-LABEL: {{^}}system_acquire_acquire_ret:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_acquire_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_acquire_acquire_ret(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acquire acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
ret void
}
-; GCN-LABEL: {{^}}wavefront_one_as_seq_cst_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @wavefront_one_as_seq_cst_monotonic(
+; GCN-LABEL: {{^}}system_release_acquire_ret:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_release_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_release_acquire_ret(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in release acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
ret void
}
-; GCN-LABEL: {{^}}wavefront_one_as_acquire_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @wavefront_one_as_acquire_acquire(
+; GCN-LABEL: {{^}}system_acq_rel_acquire_ret:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_acq_rel_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_acq_rel_acquire_ret(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in acq_rel acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
ret void
}
-; GCN-LABEL: {{^}}wavefront_one_as_release_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @wavefront_one_as_release_acquire(
+; GCN-LABEL: {{^}}system_seq_cst_acquire_ret:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_seq_cst_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_seq_cst_acquire_ret(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
ret void
}
-; GCN-LABEL: {{^}}wavefront_one_as_acq_rel_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @wavefront_one_as_acq_rel_acquire(
+; GCN-LABEL: {{^}}system_seq_cst_seq_cst_ret:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_seq_cst_seq_cst_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_seq_cst_seq_cst_ret(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in seq_cst seq_cst
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
ret void
}
-; GCN-LABEL: {{^}}wavefront_one_as_seq_cst_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @wavefront_one_as_seq_cst_acquire(
+; GCN-LABEL: {{^}}agent_acquire_monotonic_ret:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_acquire_monotonic_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_acquire_monotonic_ret(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
ret void
}
-; GCN-LABEL: {{^}}wavefront_one_as_seq_cst_seq_cst:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
-define amdgpu_kernel void @wavefront_one_as_seq_cst_seq_cst(
+; GCN-LABEL: {{^}}agent_acq_rel_monotonic_ret:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_acq_rel_monotonic_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_acq_rel_monotonic_ret(
i32* %out, i32 %in, i32 %old) {
entry:
%gep = getelementptr i32, i32* %out, i32 4
- %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_seq_cst_monotonic_ret:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_seq_cst_monotonic_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_seq_cst_monotonic_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_acquire_acquire_ret:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_acquire_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_acquire_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_release_acquire_ret:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_release_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_release_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") release acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_acq_rel_acquire_ret:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_acq_rel_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_acq_rel_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_seq_cst_acquire_ret:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_seq_cst_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_seq_cst_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_seq_cst_seq_cst_ret:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_seq_cst_seq_cst_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_seq_cst_seq_cst_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_acquire_monotonic_ret:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10: .amdhsa_kernel workgroup_acquire_monotonic_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_acquire_monotonic_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_acq_rel_monotonic_ret:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10CU: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_acq_rel_monotonic_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_acq_rel_monotonic_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_seq_cst_monotonic_ret:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10CU: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_seq_cst_monotonic_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_seq_cst_monotonic_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_acquire_acquire_ret:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10: .amdhsa_kernel workgroup_acquire_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_acquire_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_release_acquire_ret:
+; GFX8: s_waitcnt lgkmcnt(0){{$}}
+; GFX8: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX8: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10CU: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_release_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_release_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") release acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_acq_rel_acquire_ret:
+; GFX8: s_waitcnt lgkmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10CU: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_acq_rel_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_acq_rel_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_seq_cst_acquire_ret:
+; GFX8: s_waitcnt lgkmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10CU: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_seq_cst_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_seq_cst_acquire_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_seq_cst_seq_cst_ret:
+; GFX8: s_waitcnt lgkmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{( offset:[0-9]+)*}} glc{{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10CU: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_seq_cst_seq_cst_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_seq_cst_seq_cst_ret(
+ i32* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32* %out, i32 4
+ %val = cmpxchg volatile i32* %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst
+ %val0 = extractvalue { i32, i1 } %val, 0
+ store i32 %val0, i32* %out, align 4
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-fence.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-fence.ll?rev=360087&r1=360086&r2=360087&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-fence.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-fence.ll Mon May 6 14:57:02 2019
@@ -1,6 +1,8 @@
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX6,GFX68 %s
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX10,GFX10WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX10,GFX10CU %s
; FUNC-LABEL: {{^}}system_one_as_acquire:
; GCN: %bb.0
@@ -9,7 +11,15 @@
; GFX6-NEXT: buffer_wbinvl1{{$}}
; GFX8: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
-; GCN: s_endpgm
+; GFX10: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10-NEXT: buffer_gl0_inv{{$}}
+; GFX10-NEXT: buffer_gl1_inv{{$}}
+; GCN: s_endpgm
+; GFX10: .amdhsa_kernel system_one_as_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_acquire() {
entry:
fence syncscope("one-as") acquire
@@ -20,7 +30,12 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel system_one_as_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_release() {
entry:
fence syncscope("one-as") release
@@ -31,9 +46,16 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
+; GFX10-NEXT: buffer_gl0_inv{{$}}
+; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel system_one_as_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_acq_rel() {
entry:
fence syncscope("one-as") acq_rel
@@ -44,9 +66,16 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
+; GFX10-NEXT: buffer_gl0_inv{{$}}
+; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel system_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_seq_cst() {
entry:
fence syncscope("one-as") seq_cst
@@ -57,6 +86,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel singlethread_one_as_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_acquire() {
entry:
fence syncscope("singlethread-one-as") acquire
@@ -67,6 +100,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel singlethread_one_as_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_release() {
entry:
fence syncscope("singlethread-one-as") release
@@ -77,6 +114,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel singlethread_one_as_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_acq_rel() {
entry:
fence syncscope("singlethread-one-as") acq_rel
@@ -87,6 +128,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_seq_cst() {
entry:
fence syncscope("singlethread-one-as") seq_cst
@@ -100,7 +145,15 @@ entry:
; GFX6-NEXT: buffer_wbinvl1{{$}}
; GFX8: s_waitcnt vmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
-; GCN: s_endpgm
+; GFX10: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10-NEXT: buffer_gl0_inv{{$}}
+; GFX10-NEXT: buffer_gl1_inv{{$}}
+; GCN: s_endpgm
+; GFX10: .amdhsa_kernel agent_one_as_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_acquire() {
entry:
fence syncscope("agent-one-as") acquire
@@ -111,7 +164,12 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel agent_one_as_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_release() {
entry:
fence syncscope("agent-one-as") release
@@ -122,9 +180,16 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
+; GFX10-NEXT: buffer_gl0_inv{{$}}
+; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel agent_one_as_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_acq_rel() {
entry:
fence syncscope("agent-one-as") acq_rel
@@ -135,53 +200,99 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
+; GFX10-NEXT: buffer_gl0_inv{{$}}
+; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel agent_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_seq_cst() {
entry:
fence syncscope("agent-one-as") seq_cst
ret void
}
-; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
-; GCN: %bb.0
-; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: ATOMIC_FENCE
-; GCN: s_endpgm
+; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
+; GCN: %bb.0
+; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
+; GFX10CU-NOT: buffer_gl0_inv{{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+; GFX10: .amdhsa_kernel workgroup_one_as_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_acquire() {
entry:
fence syncscope("workgroup-one-as") acquire
ret void
}
-; FUNC-LABEL: {{^}}workgroup_one_as_release:
-; GCN: %bb.0
-; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: ATOMIC_FENCE
-; GCN: s_endpgm
+; FUNC-LABEL: {{^}}workgroup_one_as_release:
+; GCN: %bb.0
+; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10-NOT: buffer_gl0_inv
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+; GFX10: .amdhsa_kernel workgroup_one_as_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_release() {
entry:
fence syncscope("workgroup-one-as") release
ret void
}
-; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
-; GCN: %bb.0
-; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: ATOMIC_FENCE
-; GCN: s_endpgm
+; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
+; GCN: %bb.0
+; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: buffer_gl0_inv{{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_acq_rel() {
entry:
fence syncscope("workgroup-one-as") acq_rel
ret void
}
-; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
-; GCN: %bb.0
-; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: ATOMIC_FENCE
-; GCN: s_endpgm
+; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
+; GCN: %bb.0
+; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: buffer_gl0_inv{{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_seq_cst() {
entry:
fence syncscope("workgroup-one-as") seq_cst
@@ -192,6 +303,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel wavefront_one_as_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_acquire() {
entry:
fence syncscope("wavefront-one-as") acquire
@@ -202,6 +317,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel wavefront_one_as_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_release() {
entry:
fence syncscope("wavefront-one-as") release
@@ -212,6 +331,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel wavefront_one_as_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_acq_rel() {
entry:
fence syncscope("wavefront-one-as") acq_rel
@@ -222,6 +345,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_seq_cst() {
entry:
fence syncscope("wavefront-one-as") seq_cst
@@ -235,7 +362,15 @@ entry:
; GFX6-NEXT: buffer_wbinvl1{{$}}
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
-; GCN: s_endpgm
+; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10-NEXT: buffer_gl0_inv{{$}}
+; GFX10-NEXT: buffer_gl1_inv{{$}}
+; GCN: s_endpgm
+; GFX10: .amdhsa_kernel system_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_acquire() {
entry:
fence acquire
@@ -245,8 +380,15 @@ entry:
; FUNC-LABEL: {{^}}system_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel system_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_release() {
entry:
fence release
@@ -256,10 +398,19 @@ entry:
; FUNC-LABEL: {{^}}system_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
+; GFX10-NEXT: buffer_gl0_inv{{$}}
+; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel system_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_acq_rel() {
entry:
fence acq_rel
@@ -269,10 +420,19 @@ entry:
; FUNC-LABEL: {{^}}system_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
+; GFX10-NEXT: buffer_gl0_inv{{$}}
+; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel system_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_seq_cst() {
entry:
fence seq_cst
@@ -283,6 +443,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel singlethread_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_acquire() {
entry:
fence syncscope("singlethread") acquire
@@ -293,6 +457,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel singlethread_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_release() {
entry:
fence syncscope("singlethread") release
@@ -303,6 +471,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel singlethread_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_acq_rel() {
entry:
fence syncscope("singlethread") acq_rel
@@ -313,6 +485,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel singlethread_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_seq_cst() {
entry:
fence syncscope("singlethread") seq_cst
@@ -326,7 +502,15 @@ entry:
; GFX6-NEXT: buffer_wbinvl1{{$}}
; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX8-NEXT: buffer_wbinvl1_vol{{$}}
-; GCN: s_endpgm
+; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10-NEXT: buffer_gl0_inv{{$}}
+; GFX10-NEXT: buffer_gl1_inv{{$}}
+; GCN: s_endpgm
+; GFX10: .amdhsa_kernel agent_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_acquire() {
entry:
fence syncscope("agent") acquire
@@ -336,8 +520,15 @@ entry:
; FUNC-LABEL: {{^}}agent_release:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel agent_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_release() {
entry:
fence syncscope("agent") release
@@ -347,10 +538,19 @@ entry:
; FUNC-LABEL: {{^}}agent_acq_rel:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
+; GFX10-NEXT: buffer_gl0_inv{{$}}
+; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel agent_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_acq_rel() {
entry:
fence syncscope("agent") acq_rel
@@ -360,54 +560,102 @@ entry:
; FUNC-LABEL: {{^}}agent_seq_cst:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX6: buffer_wbinvl1{{$}}
; GFX8: buffer_wbinvl1_vol{{$}}
+; GFX10-NEXT: buffer_gl0_inv{{$}}
+; GFX10-NEXT: buffer_gl1_inv{{$}}
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel agent_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_seq_cst() {
entry:
fence syncscope("agent") seq_cst
ret void
}
-; FUNC-LABEL: {{^}}workgroup_acquire:
-; GCN: %bb.0
-; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: ATOMIC_FENCE
-; GCN: s_endpgm
+; FUNC-LABEL: {{^}}workgroup_acquire:
+; GCN: %bb.0
+; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
+; GFX10CU-NOT: buffer_gl0_inv{{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+; GFX10: .amdhsa_kernel workgroup_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_acquire() {
entry:
fence syncscope("workgroup") acquire
ret void
}
-; FUNC-LABEL: {{^}}workgroup_release:
-; GCN: %bb.0
-; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: ATOMIC_FENCE
-; GCN: s_endpgm
+; FUNC-LABEL: {{^}}workgroup_release:
+; GCN: %bb.0
+; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10-NOT: buffer_gl0_inv
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+; GFX10: .amdhsa_kernel workgroup_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_release() {
entry:
fence syncscope("workgroup") release
ret void
}
-; FUNC-LABEL: {{^}}workgroup_acq_rel:
-; GCN: %bb.0
-; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: ATOMIC_FENCE
-; GCN: s_endpgm
+; FUNC-LABEL: {{^}}workgroup_acq_rel:
+; GCN: %bb.0
+; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: buffer_gl0_inv{{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+; GFX10: .amdhsa_kernel workgroup_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_acq_rel() {
entry:
fence syncscope("workgroup") acq_rel
ret void
}
-; FUNC-LABEL: {{^}}workgroup_seq_cst:
-; GCN: %bb.0
-; GFX68-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: ATOMIC_FENCE
-; GCN: s_endpgm
+; FUNC-LABEL: {{^}}workgroup_seq_cst:
+; GCN: %bb.0
+; GFX68-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10WGP-NEXT: buffer_gl0_inv{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: buffer_gl0_inv{{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+; GFX10: .amdhsa_kernel workgroup_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_seq_cst() {
entry:
fence syncscope("workgroup") seq_cst
@@ -418,6 +666,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel wavefront_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_acquire() {
entry:
fence syncscope("wavefront") acquire
@@ -428,6 +680,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel wavefront_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_release() {
entry:
fence syncscope("wavefront") release
@@ -438,6 +694,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel wavefront_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_acq_rel() {
entry:
fence syncscope("wavefront") acq_rel
@@ -448,6 +708,10 @@ entry:
; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm
+; GFX10: .amdhsa_kernel wavefront_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_seq_cst() {
entry:
fence syncscope("wavefront") seq_cst
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll?rev=360087&r1=360086&r2=360087&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-rmw.ll Mon May 6 14:57:02 2019
@@ -1,11 +1,19 @@
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10CU %s
; GCN-LABEL: {{^}}system_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel system_one_as_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_monotonic(
i32* %out, i32 %in) {
entry:
@@ -15,9 +23,17 @@ entry:
; GCN-LABEL: {{^}}system_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_acquire(
i32* %out, i32 %in) {
entry:
@@ -27,9 +43,15 @@ entry:
; GCN-LABEL: {{^}}system_one_as_release:
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel system_one_as_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_release(
i32* %out, i32 %in) {
entry:
@@ -39,9 +61,17 @@ entry:
; GCN-LABEL: {{^}}system_one_as_acq_rel:
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_acq_rel(
i32* %out, i32 %in) {
entry:
@@ -51,9 +81,17 @@ entry:
; GCN-LABEL: {{^}}system_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_seq_cst(
i32* %out, i32 %in) {
entry:
@@ -63,9 +101,15 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel singlethread_one_as_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_monotonic(
i32* %out, i32 %in) {
entry:
@@ -75,9 +119,15 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel singlethread_one_as_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_acquire(
i32* %out, i32 %in) {
entry:
@@ -87,9 +137,15 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel singlethread_one_as_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_release(
i32* %out, i32 %in) {
entry:
@@ -99,9 +155,15 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_acq_rel:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel singlethread_one_as_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_acq_rel(
i32* %out, i32 %in) {
entry:
@@ -111,9 +173,15 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_seq_cst(
i32* %out, i32 %in) {
entry:
@@ -123,9 +191,15 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel agent_one_as_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_monotonic(
i32* %out, i32 %in) {
entry:
@@ -135,9 +209,17 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_acquire(
i32* %out, i32 %in) {
entry:
@@ -147,9 +229,15 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_release:
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel agent_one_as_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_release(
i32* %out, i32 %in) {
entry:
@@ -159,9 +247,17 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_acq_rel:
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_acq_rel(
i32* %out, i32 %in) {
entry:
@@ -171,9 +267,17 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_seq_cst(
i32* %out, i32 %in) {
entry:
@@ -183,9 +287,15 @@ entry:
; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel workgroup_one_as_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_monotonic(
i32* %out, i32 %in) {
entry:
@@ -193,11 +303,20 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}workgroup_one_as_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
+; GCN-LABEL: {{^}}workgroup_one_as_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_one_as_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_acquire(
i32* %out, i32 %in) {
entry:
@@ -205,11 +324,20 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}workgroup_one_as_release:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-LABEL: {{^}}workgroup_one_as_release:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel workgroup_one_as_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_release(
i32* %out, i32 %in) {
entry:
@@ -217,11 +345,23 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}workgroup_one_as_acq_rel:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
+; GCN-LABEL: {{^}}workgroup_one_as_acq_rel:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_acq_rel(
i32* %out, i32 %in) {
entry:
@@ -229,11 +369,23 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
-; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
+; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_seq_cst(
i32* %out, i32 %in) {
entry:
@@ -243,9 +395,15 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel wavefront_one_as_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_monotonic(
i32* %out, i32 %in) {
entry:
@@ -255,9 +413,15 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel wavefront_one_as_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_acquire(
i32* %out, i32 %in) {
entry:
@@ -267,9 +431,15 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel wavefront_one_as_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_release(
i32* %out, i32 %in) {
entry:
@@ -279,9 +449,15 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_acq_rel:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel wavefront_one_as_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_acq_rel(
i32* %out, i32 %in) {
entry:
@@ -291,9 +467,15 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_seq_cst(
i32* %out, i32 %in) {
entry:
@@ -301,11 +483,209 @@ entry:
ret void
}
+; GCN-LABEL: {{^}}system_one_as_acquire_ret:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_one_as_acquire_ret(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acquire
+ store i32 %val, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_acq_rel_ret:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_acq_rel_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_one_as_acq_rel_ret(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") acq_rel
+ store i32 %val, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_one_as_seq_cst_ret:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_one_as_seq_cst_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_one_as_seq_cst_ret(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("one-as") seq_cst
+ store i32 %val, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_acquire_ret:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_acquire_ret(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acquire
+ store i32 %val, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_acq_rel_ret:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_acq_rel_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_acq_rel_ret(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") acq_rel
+ store i32 %val, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_one_as_seq_cst_ret:
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_one_as_seq_cst_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_one_as_seq_cst_ret(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent-one-as") seq_cst
+ store i32 %val, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_acquire_ret:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_one_as_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_acquire_ret(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acquire
+ store i32 %val, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_acq_rel_ret:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_acq_rel_ret(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") acq_rel
+ store i32 %val, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_one_as_seq_cst_ret:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_one_as_seq_cst_ret(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup-one-as") seq_cst
+ store i32 %val, i32* %out, align 4
+ ret void
+}
+
; GCN-LABEL: {{^}}system_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel system_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_monotonic(
i32* %out, i32 %in) {
entry:
@@ -314,10 +694,19 @@ entry:
}
; GCN-LABEL: {{^}}system_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_acquire(
i32* %out, i32 %in) {
entry:
@@ -326,10 +715,17 @@ entry:
}
; GCN-LABEL: {{^}}system_release:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel system_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_release(
i32* %out, i32 %in) {
entry:
@@ -338,10 +734,20 @@ entry:
}
; GCN-LABEL: {{^}}system_acq_rel:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_acq_rel(
i32* %out, i32 %in) {
entry:
@@ -350,10 +756,20 @@ entry:
}
; GCN-LABEL: {{^}}system_seq_cst:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_seq_cst(
i32* %out, i32 %in) {
entry:
@@ -362,10 +778,16 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel singlethread_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_monotonic(
i32* %out, i32 %in) {
entry:
@@ -374,10 +796,16 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel singlethread_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_acquire(
i32* %out, i32 %in) {
entry:
@@ -386,10 +814,16 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_release:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel singlethread_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_release(
i32* %out, i32 %in) {
entry:
@@ -398,10 +832,16 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_acq_rel:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel singlethread_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_acq_rel(
i32* %out, i32 %in) {
entry:
@@ -410,10 +850,16 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_seq_cst:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel singlethread_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_seq_cst(
i32* %out, i32 %in) {
entry:
@@ -422,10 +868,16 @@ entry:
}
; GCN-LABEL: {{^}}agent_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel agent_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_monotonic(
i32* %out, i32 %in) {
entry:
@@ -434,10 +886,19 @@ entry:
}
; GCN-LABEL: {{^}}agent_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_acquire(
i32* %out, i32 %in) {
entry:
@@ -446,10 +907,17 @@ entry:
}
; GCN-LABEL: {{^}}agent_release:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel agent_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_release(
i32* %out, i32 %in) {
entry:
@@ -458,10 +926,20 @@ entry:
}
; GCN-LABEL: {{^}}agent_acq_rel:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_acq_rel(
i32* %out, i32 %in) {
entry:
@@ -470,10 +948,20 @@ entry:
}
; GCN-LABEL: {{^}}agent_seq_cst:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt lgkmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_seq_cst(
i32* %out, i32 %in) {
entry:
@@ -482,10 +970,16 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel workgroup_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_monotonic(
i32* %out, i32 %in) {
entry:
@@ -493,11 +987,20 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}workgroup_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
+; GCN-LABEL: {{^}}workgroup_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_acquire(
i32* %out, i32 %in) {
entry:
@@ -505,11 +1008,20 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}workgroup_release:
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-LABEL: {{^}}workgroup_release:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel workgroup_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_release(
i32* %out, i32 %in) {
entry:
@@ -517,11 +1029,23 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}workgroup_acq_rel:
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
+; GCN-LABEL: {{^}}workgroup_acq_rel:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_acq_rel(
i32* %out, i32 %in) {
entry:
@@ -529,11 +1053,23 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}workgroup_seq_cst:
-; GFX8-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX8-NOT: buffer_wbinvl1_vol
+; GCN-LABEL: {{^}}workgroup_seq_cst:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_seq_cst(
i32* %out, i32 %in) {
entry:
@@ -542,10 +1078,16 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel wavefront_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_monotonic(
i32* %out, i32 %in) {
entry:
@@ -554,10 +1096,16 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel wavefront_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_acquire(
i32* %out, i32 %in) {
entry:
@@ -566,10 +1114,16 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_release:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel wavefront_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_release(
i32* %out, i32 %in) {
entry:
@@ -578,10 +1132,16 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_acq_rel:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel wavefront_acq_rel
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_acq_rel(
i32* %out, i32 %in) {
entry:
@@ -590,13 +1150,221 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_seq_cst:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NOT: buffer_wbinvl1_vol
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN-NOT: buffer_{{wbinvl1_vol|gl._inv}}
+; GFX10: .amdhsa_kernel wavefront_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_seq_cst(
i32* %out, i32 %in) {
entry:
%val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("wavefront") seq_cst
ret void
}
+
+; GCN-LABEL: {{^}}system_acquire_ret:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_acquire_ret(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in acquire
+ store i32 %val, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_acq_rel_ret:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_acq_rel_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_acq_rel_ret(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in acq_rel
+ store i32 %val, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}system_seq_cst_ret:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel system_seq_cst_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @system_seq_cst_ret(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in seq_cst
+ store i32 %val, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_acquire_ret:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_acquire_ret(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acquire
+ store i32 %val, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_acq_rel_ret:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_acq_rel_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_acq_rel_ret(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") acq_rel
+ store i32 %val, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}agent_seq_cst_ret:
+; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN-NEXT: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX8-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GFX10: .amdhsa_kernel agent_seq_cst_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @agent_seq_cst_ret(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("agent") seq_cst
+ store i32 %val, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_acquire_ret:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_acquire_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_acquire_ret(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acquire
+ store i32 %val, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_acq_rel_ret:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_acq_rel_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_acq_rel_ret(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") acq_rel
+ store i32 %val, i32* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}workgroup_seq_cst_ret:
+; GFX8-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX8-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GFX10: .amdhsa_kernel workgroup_seq_cst_ret
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
+define amdgpu_kernel void @workgroup_seq_cst_ret(
+ i32* %out, i32 %in) {
+entry:
+ %val = atomicrmw volatile xchg i32* %out, i32 %in syncscope("workgroup") seq_cst
+ store i32 %val, i32* %out, align 4
+ ret void
+}
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll?rev=360087&r1=360086&r2=360087&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-syncscope.ll Mon May 6 14:57:02 2019
@@ -1,5 +1,6 @@
; RUN: not llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s 2>&1 | FileCheck %s
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s 2>&1 | FileCheck %s
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s 2>&1 | FileCheck %s
; CHECK: error: <unknown>:0:0: in function invalid_fence void (): Unsupported atomic synchronization scope
define amdgpu_kernel void @invalid_fence() {
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll?rev=360087&r1=360086&r2=360087&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll Mon May 6 14:57:02 2019
@@ -2,15 +2,24 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10CU %s
declare i32 @llvm.amdgcn.workitem.id.x()
; GCN-LABEL: {{^}}system_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel system_one_as_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_unordered(
i32* %in, i32* %out) {
entry:
@@ -21,10 +30,18 @@ entry:
; GCN-LABEL: {{^}}system_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel system_one_as_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_monotonic(
i32* %in, i32* %out) {
entry:
@@ -35,10 +52,18 @@ entry:
; GCN-LABEL: {{^}}system_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel system_one_as_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_acquire(
i32* %in, i32* %out) {
entry:
@@ -49,10 +74,18 @@ entry:
; GCN-LABEL: {{^}}system_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
-; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX89-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel system_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
@@ -63,10 +96,17 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel singlethread_one_as_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_unordered(
i32* %in, i32* %out) {
entry:
@@ -77,10 +117,17 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel singlethread_one_as_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_monotonic(
i32* %in, i32* %out) {
entry:
@@ -91,10 +138,17 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel singlethread_one_as_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_acquire(
i32* %in, i32* %out) {
entry:
@@ -105,10 +159,17 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
@@ -119,10 +180,17 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel agent_one_as_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_unordered(
i32* %in, i32* %out) {
entry:
@@ -133,10 +201,18 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel agent_one_as_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_monotonic(
i32* %in, i32* %out) {
entry:
@@ -147,10 +223,18 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel agent_one_as_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_acquire(
i32* %in, i32* %out) {
entry:
@@ -161,10 +245,18 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
-; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX89-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel agent_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
@@ -175,10 +267,17 @@ entry:
; GCN-LABEL: {{^}}workgroup_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel workgroup_one_as_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_unordered(
i32* %in, i32* %out) {
entry:
@@ -187,12 +286,21 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GFX89-NOT: buffer_wbinvl1_vol
-; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GFX10WGP: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10CU-NOT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel workgroup_one_as_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_monotonic(
i32* %in, i32* %out) {
entry:
@@ -201,12 +309,23 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}workgroup_one_as_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0){{$}}
-; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
-; GFX89-NOT: buffer_wbinvl1_vol
-; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GCN-LABEL: {{^}}workgroup_one_as_acquire:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GFX10WGP: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10CU-NOT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel workgroup_one_as_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_acquire(
i32* %in, i32* %out) {
entry:
@@ -215,12 +334,26 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
-; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
-; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
-; GFX89-NOT: buffer_wbinvl1_vol
-; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
+; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0
+; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GFX10WGP: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10CU: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
@@ -231,10 +364,17 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel wavefront_one_as_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_unordered(
i32* %in, i32* %out) {
entry:
@@ -245,10 +385,17 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel wavefront_one_as_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_monotonic(
i32* %in, i32* %out) {
entry:
@@ -259,10 +406,17 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_acquire:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel wavefront_one_as_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_acquire(
i32* %in, i32* %out) {
entry:
@@ -273,10 +427,17 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_seq_cst(
i32* %in, i32* %out) {
entry:
@@ -287,6 +448,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_private_0:
; GFX89: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
+; GFX10: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen slc{{$}}
+; GFX10: .amdhsa_kernel nontemporal_private_0
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_private_0(
i32 addrspace(5)* %in, i32* %out) {
entry:
@@ -297,6 +463,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_private_1:
; GFX89: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
+; GFX10: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen slc{{$}}
+; GFX10: .amdhsa_kernel nontemporal_private_1
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_private_1(
i32 addrspace(5)* %in, i32* %out) {
entry:
@@ -309,6 +480,10 @@ entry:
; GCN-LABEL: {{^}}nontemporal_global_0:
; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0x0{{$}}
+; GFX10: .amdhsa_kernel nontemporal_global_0
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_global_0(
i32 addrspace(1)* %in, i32* %out) {
entry:
@@ -320,6 +495,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_global_1:
; GFX8: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
; GFX9: global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
+; GFX10: global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] slc{{$}}
+; GFX10: .amdhsa_kernel nontemporal_global_1
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_global_1(
i32 addrspace(1)* %in, i32* %out) {
entry:
@@ -332,6 +512,10 @@ entry:
; GCN-LABEL: {{^}}nontemporal_local_0:
; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel nontemporal_local_0
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_local_0(
i32 addrspace(3)* %in, i32* %out) {
entry:
@@ -342,6 +526,10 @@ entry:
; GCN-LABEL: {{^}}nontemporal_local_1:
; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel nontemporal_local_1
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_local_1(
i32 addrspace(3)* %in, i32* %out) {
entry:
@@ -354,6 +542,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_flat_0:
; GFX89: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
+; GFX10: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] slc{{$}}
+; GFX10: .amdhsa_kernel nontemporal_flat_0
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_flat_0(
i32* %in, i32* %out) {
entry:
@@ -364,6 +557,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_flat_1:
; GFX89: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
+; GFX10: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] slc{{$}}
+; GFX10: .amdhsa_kernel nontemporal_flat_1
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_flat_1(
i32* %in, i32* %out) {
entry:
@@ -375,11 +573,18 @@ entry:
}
; GCN-LABEL: {{^}}system_unordered:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel system_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_unordered(
i32* %in, i32* %out) {
entry:
@@ -389,11 +594,19 @@ entry:
}
; GCN-LABEL: {{^}}system_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel system_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_monotonic(
i32* %in, i32* %out) {
entry:
@@ -403,11 +616,20 @@ entry:
}
; GCN-LABEL: {{^}}system_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
+; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
-; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel system_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_acquire(
i32* %in, i32* %out) {
entry:
@@ -417,11 +639,21 @@ entry:
}
; GCN-LABEL: {{^}}system_seq_cst:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX89-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
+; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
-; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel system_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_seq_cst(
i32* %in, i32* %out) {
entry:
@@ -431,11 +663,18 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_unordered:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel singlethread_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_unordered(
i32* %in, i32* %out) {
entry:
@@ -445,11 +684,18 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel singlethread_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_monotonic(
i32* %in, i32* %out) {
entry:
@@ -459,11 +705,18 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel singlethread_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_acquire(
i32* %in, i32* %out) {
entry:
@@ -473,11 +726,18 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_seq_cst:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel singlethread_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_seq_cst(
i32* %in, i32* %out) {
entry:
@@ -487,11 +747,18 @@ entry:
}
; GCN-LABEL: {{^}}agent_unordered:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel agent_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_unordered(
i32* %in, i32* %out) {
entry:
@@ -501,11 +768,19 @@ entry:
}
; GCN-LABEL: {{^}}agent_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel agent_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_monotonic(
i32* %in, i32* %out) {
entry:
@@ -515,11 +790,20 @@ entry:
}
; GCN-LABEL: {{^}}agent_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
+; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
-; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel agent_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_acquire(
i32* %in, i32* %out) {
entry:
@@ -529,11 +813,21 @@ entry:
}
; GCN-LABEL: {{^}}agent_seq_cst:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX89-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc dlc{{$}}
+; GFX89-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GFX89-NEXT: buffer_wbinvl1_vol
-; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10-NEXT: buffer_gl0_inv
+; GFX10-NEXT: buffer_gl1_inv
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel agent_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_seq_cst(
i32* %in, i32* %out) {
entry:
@@ -543,11 +837,18 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_unordered:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel workgroup_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_unordered(
i32* %in, i32* %out) {
entry:
@@ -556,12 +857,21 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}workgroup_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX89-NOT: buffer_wbinvl1_vol
-; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GCN-LABEL: {{^}}workgroup_monotonic:
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GFX10WGP: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10CU-NOT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel workgroup_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_monotonic(
i32* %in, i32* %out) {
entry:
@@ -570,12 +880,21 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}workgroup_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX89-NOT: buffer_wbinvl1_vol
-; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GCN-LABEL: {{^}}workgroup_acquire:
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
+; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GFX10WGP: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10CU-NOT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel workgroup_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_acquire(
i32* %in, i32* %out) {
entry:
@@ -584,12 +903,25 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}workgroup_seq_cst:
-; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GFX89-NOT: buffer_wbinvl1_vol
-; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GCN-LABEL: {{^}}workgroup_seq_cst:
+; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0
+; GFX89: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GFX10WGP: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
+; GFX10CU: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
+; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10WGP-NEXT: buffer_gl0_inv
+; GFX10CU-NOT: buffer_gl0_inv
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel workgroup_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_seq_cst(
i32* %in, i32* %out) {
entry:
@@ -599,11 +931,18 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_unordered:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel wavefront_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_unordered(
i32* %in, i32* %out) {
entry:
@@ -613,11 +952,18 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel wavefront_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_monotonic(
i32* %in, i32* %out) {
entry:
@@ -627,11 +973,18 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_acquire:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel wavefront_acquire
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_acquire(
i32* %in, i32* %out) {
entry:
@@ -641,11 +994,18 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_seq_cst:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GFX89-NOT: buffer_wbinvl1_vol
+; GFX10-NOT: buffer_gl{{[01]}}_inv
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
+; GFX10: .amdhsa_kernel wavefront_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_seq_cst(
i32* %in, i32* %out) {
entry:
Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll?rev=360087&r1=360086&r2=360087&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll Mon May 6 14:57:02 2019
@@ -2,12 +2,19 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10CU %s
declare i32 @llvm.amdgcn.workitem.id.x()
; GCN-LABEL: {{^}}system_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel system_one_as_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_unordered(
i32 %in, i32* %out) {
entry:
@@ -17,7 +24,12 @@ entry:
; GCN-LABEL: {{^}}system_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel system_one_as_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_monotonic(
i32 %in, i32* %out) {
entry:
@@ -27,7 +39,12 @@ entry:
; GCN-LABEL: {{^}}system_one_as_release:
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel system_one_as_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_release(
i32 %in, i32* %out) {
entry:
@@ -37,7 +54,12 @@ entry:
; GCN-LABEL: {{^}}system_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel system_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
@@ -47,7 +69,12 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel singlethread_one_as_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_unordered(
i32 %in, i32* %out) {
entry:
@@ -57,7 +84,12 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel singlethread_one_as_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_monotonic(
i32 %in, i32* %out) {
entry:
@@ -67,7 +99,12 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel singlethread_one_as_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_release(
i32 %in, i32* %out) {
entry:
@@ -77,7 +114,12 @@ entry:
; GCN-LABEL: {{^}}singlethread_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
@@ -87,7 +129,12 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel agent_one_as_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_unordered(
i32 %in, i32* %out) {
entry:
@@ -97,7 +144,12 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel agent_one_as_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_monotonic(
i32 %in, i32* %out) {
entry:
@@ -107,7 +159,12 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_release:
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel agent_one_as_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_release(
i32 %in, i32* %out) {
entry:
@@ -117,7 +174,12 @@ entry:
; GCN-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: s_waitcnt vmcnt(0){{$}}
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel agent_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
@@ -127,7 +189,12 @@ entry:
; GCN-LABEL: {{^}}workgroup_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel workgroup_one_as_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_unordered(
i32 %in, i32* %out) {
entry:
@@ -137,7 +204,12 @@ entry:
; GCN-LABEL: {{^}}workgroup_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel workgroup_one_as_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_monotonic(
i32 %in, i32* %out) {
entry:
@@ -145,9 +217,17 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}workgroup_one_as_release:
-; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GCN-LABEL: {{^}}workgroup_one_as_release:
+; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel workgroup_one_as_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_release(
i32 %in, i32* %out) {
entry:
@@ -155,9 +235,17 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
-; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
-; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GCN-LABEL: {{^}}workgroup_one_as_seq_cst:
+; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
@@ -167,7 +255,12 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_unordered:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel wavefront_one_as_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_unordered(
i32 %in, i32* %out) {
entry:
@@ -177,7 +270,12 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_monotonic:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel wavefront_one_as_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_monotonic(
i32 %in, i32* %out) {
entry:
@@ -187,7 +285,12 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_release:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel wavefront_one_as_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_release(
i32 %in, i32* %out) {
entry:
@@ -197,7 +300,12 @@ entry:
; GCN-LABEL: {{^}}wavefront_one_as_seq_cst:
; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_one_as_seq_cst(
i32 %in, i32* %out) {
entry:
@@ -207,6 +315,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_private_0:
; GFX89: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
+; GFX10: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen slc{{$}}
+; GFX10: .amdhsa_kernel nontemporal_private_0
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_private_0(
i32* %in, i32 addrspace(5)* %out) {
entry:
@@ -217,6 +330,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_private_1:
; GFX89: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}}
+; GFX10: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen slc{{$}}
+; GFX10: .amdhsa_kernel nontemporal_private_1
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_private_1(
i32* %in, i32 addrspace(5)* %out) {
entry:
@@ -230,6 +348,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_global_0:
; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}}
+; GFX10: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off slc{{$}}
+; GFX10: .amdhsa_kernel nontemporal_global_0
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_global_0(
i32* %in, i32 addrspace(1)* %out) {
entry:
@@ -241,6 +364,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_global_1:
; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
+; GFX10: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] slc{{$}}
+; GFX10: .amdhsa_kernel nontemporal_global_1
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_global_1(
i32* %in, i32 addrspace(1)* %out) {
entry:
@@ -253,6 +381,10 @@ entry:
; GCN-LABEL: {{^}}nontemporal_local_0:
; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel nontemporal_local_0
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_local_0(
i32* %in, i32 addrspace(3)* %out) {
entry:
@@ -263,6 +395,10 @@ entry:
; GCN-LABEL: {{^}}nontemporal_local_1:
; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel nontemporal_local_1
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_local_1(
i32* %in, i32 addrspace(3)* %out) {
entry:
@@ -275,6 +411,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_flat_0:
; GFX89: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
+; GFX10: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} slc{{$}}
+; GFX10: .amdhsa_kernel nontemporal_flat_0
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_flat_0(
i32* %in, i32* %out) {
entry:
@@ -285,6 +426,11 @@ entry:
; GCN-LABEL: {{^}}nontemporal_flat_1:
; GFX89: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
+; GFX10: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} slc{{$}}
+; GFX10: .amdhsa_kernel nontemporal_flat_1
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @nontemporal_flat_1(
i32* %in, i32* %out) {
entry:
@@ -296,8 +442,13 @@ entry:
}
; GCN-LABEL: {{^}}system_unordered:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel system_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_unordered(
i32 %in, i32* %out) {
entry:
@@ -306,8 +457,13 @@ entry:
}
; GCN-LABEL: {{^}}system_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel system_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_monotonic(
i32 %in, i32* %out) {
entry:
@@ -316,8 +472,14 @@ entry:
}
; GCN-LABEL: {{^}}system_release:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel system_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_release(
i32 %in, i32* %out) {
entry:
@@ -326,8 +488,14 @@ entry:
}
; GCN-LABEL: {{^}}system_seq_cst:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel system_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @system_seq_cst(
i32 %in, i32* %out) {
entry:
@@ -336,8 +504,13 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_unordered:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel singlethread_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_unordered(
i32 %in, i32* %out) {
entry:
@@ -346,8 +519,13 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel singlethread_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_monotonic(
i32 %in, i32* %out) {
entry:
@@ -356,8 +534,13 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_release:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel singlethread_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_release(
i32 %in, i32* %out) {
entry:
@@ -366,8 +549,13 @@ entry:
}
; GCN-LABEL: {{^}}singlethread_seq_cst:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel singlethread_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @singlethread_seq_cst(
i32 %in, i32* %out) {
entry:
@@ -376,8 +564,13 @@ entry:
}
; GCN-LABEL: {{^}}agent_unordered:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel agent_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_unordered(
i32 %in, i32* %out) {
entry:
@@ -386,8 +579,13 @@ entry:
}
; GCN-LABEL: {{^}}agent_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel agent_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_monotonic(
i32 %in, i32* %out) {
entry:
@@ -396,8 +594,14 @@ entry:
}
; GCN-LABEL: {{^}}agent_release:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel agent_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_release(
i32 %in, i32* %out) {
entry:
@@ -406,8 +610,14 @@ entry:
}
; GCN-LABEL: {{^}}agent_seq_cst:
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX89: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10: s_waitcnt lgkmcnt(0){{$}}
+; GFX10: s_waitcnt_vscnt null, 0x0{{$}}
; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel agent_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @agent_seq_cst(
i32 %in, i32* %out) {
entry:
@@ -416,8 +626,13 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_unordered:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel workgroup_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_unordered(
i32 %in, i32* %out) {
entry:
@@ -426,8 +641,13 @@ entry:
}
; GCN-LABEL: {{^}}workgroup_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel workgroup_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_monotonic(
i32 %in, i32* %out) {
entry:
@@ -435,9 +655,17 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}workgroup_release:
-; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GCN-LABEL: {{^}}workgroup_release:
+; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel workgroup_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_release(
i32 %in, i32* %out) {
entry:
@@ -445,9 +673,17 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}workgroup_seq_cst:
-; GFX89-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GCN-LABEL: {{^}}workgroup_seq_cst:
+; GFX89-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}}
+; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}}
+; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel workgroup_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @workgroup_seq_cst(
i32 %in, i32* %out) {
entry:
@@ -456,8 +692,13 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_unordered:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel wavefront_unordered
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_unordered(
i32 %in, i32* %out) {
entry:
@@ -466,8 +707,13 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_monotonic:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel wavefront_monotonic
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_monotonic(
i32 %in, i32* %out) {
entry:
@@ -476,8 +722,13 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_release:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel wavefront_release
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_release(
i32 %in, i32* %out) {
entry:
@@ -486,8 +737,13 @@ entry:
}
; GCN-LABEL: {{^}}wavefront_seq_cst:
-; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GFX10-NOT: s_waitcnt_v{{[ms]}}cnt {{[^,]+, (0x)*0$}}
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
+; GFX10: .amdhsa_kernel wavefront_seq_cst
+; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
+; GFX10CU: .amdhsa_workgroup_processor_mode 0
+; GFX10-NOT: .amdhsa_memory_ordered 0
define amdgpu_kernel void @wavefront_seq_cst(
i32 %in, i32* %out) {
entry:
More information about the llvm-commits
mailing list