[llvm] bed9be9 - [AMDGPU][gfx1250] Implement SIMemoryLegalizer (#154726)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 10 01:18:17 PDT 2025
Author: Pierre van Houtryve
Date: 2025-09-10T10:18:11+02:00
New Revision: bed9be954d5a8e4166629e489052c96e8cb24f99
URL: https://github.com/llvm/llvm-project/commit/bed9be954d5a8e4166629e489052c96e8cb24f99
DIFF: https://github.com/llvm/llvm-project/commit/bed9be954d5a8e4166629e489052c96e8cb24f99.diff
LOG: [AMDGPU][gfx1250] Implement SIMemoryLegalizer (#154726)
Implements the base of the MemoryLegalizer for a roughly correct GFX1250 memory model.
Documentation will come later, and some remaining changes still have to be added, but this is the backbone of the model.
Added:
Modified:
llvm/lib/Target/AMDGPU/GCNSubtarget.h
llvm/lib/Target/AMDGPU/SIInstrInfo.h
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
llvm/lib/Target/AMDGPU/SOPInstructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
llvm/test/CodeGen/AMDGPU/atomics-system-scope.ll
llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll
llvm/test/CodeGen/AMDGPU/fp-atomics-gfx942.ll
llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-agent.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-workgroup.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-barriers.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-agent.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-singlethread.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-system.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-wavefront.ll
llvm/test/CodeGen/AMDGPU/memory-legalizer-private-workgroup.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 4475c8d1d1602..556ec683f2ec6 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1835,6 +1835,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasScratchBaseForwardingHazard() const {
return GFX1250Insts && getGeneration() == GFX12;
}
+
+ /// \returns true if the subtarget requires a wait for xcnt before atomic
+ /// flat/global stores & rmw.
+ bool requiresWaitXCntBeforeAtomicStores() const { return GFX1250Insts; }
};
class GCNUserSGPRUsageInfo {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index c964d02ee2b97..f7dde2b90b68e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1055,6 +1055,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return AMDGPU::S_WAIT_DSCNT;
case AMDGPU::S_WAIT_KMCNT_soft:
return AMDGPU::S_WAIT_KMCNT;
+ case AMDGPU::S_WAIT_XCNT_soft:
+ return AMDGPU::S_WAIT_XCNT;
default:
return Opcode;
}
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 6ab4eb4bde97c..c20fcacb8fb26 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -606,7 +606,11 @@ class SIGfx12CacheControl : public SIGfx11CacheControl {
SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const;
public:
- SIGfx12CacheControl(const GCNSubtarget &ST) : SIGfx11CacheControl(ST) {}
+ SIGfx12CacheControl(const GCNSubtarget &ST) : SIGfx11CacheControl(ST) {
+ // GFX12.0 and GFX12.5 memory models greatly overlap, and in some cases
+ // the behavior is the same if assuming GFX12.0 in CU mode.
+ assert(!ST.hasGFX1250Insts() || ST.isCuModeEnabled());
+ }
bool insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
@@ -2198,7 +2202,8 @@ bool SIGfx10CacheControl::insertBarrierStart(
// mode. This is because a CU mode release fence does not emit any wait, which
// is fine when only dealing with vmem, but isn't sufficient in the presence
// of barriers which do not go through vmem.
- if (!ST.isCuModeEnabled())
+ // GFX12.5 does not require this additional wait.
+ if (!ST.isCuModeEnabled() || ST.hasGFX1250Insts())
return false;
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
@@ -2378,12 +2383,16 @@ bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI,
STORECnt |= true;
break;
case SIAtomicScope::WORKGROUP:
- // In WGP mode the waves of a work-group can be executing on either CU of
- // the WGP. Therefore need to wait for operations to complete to ensure
- // they are visible to waves in the other CU as the L0 is per CU.
- // Otherwise in CU mode and all waves of a work-group are on the same CU
- // which shares the same L0.
- if (!ST.isCuModeEnabled()) {
+ // GFX12.0:
+ // In WGP mode the waves of a work-group can be executing on either CU
+ // of the WGP. Therefore need to wait for operations to complete to
+ // ensure they are visible to waves in the other CU as the L0 is per CU.
+ // Otherwise in CU mode and all waves of a work-group are on the same CU
+ // which shares the same L0.
+ //
+ // GFX12.5:
+ // TODO DOCS
+ if (!ST.isCuModeEnabled() || ST.hasGFX1250Insts()) {
if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
LOADCnt |= true;
if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
@@ -2435,7 +2444,7 @@ bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI,
//
// This also applies to fences. Fences cannot pair with an instruction
// tracked with bvh/samplecnt as we don't have any atomics that do that.
- if (Order != AtomicOrdering::Acquire) {
+ if (Order != AtomicOrdering::Acquire && ST.hasImageInsts()) {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_BVHCNT_soft)).addImm(0);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_SAMPLECNT_soft)).addImm(0);
}
@@ -2487,10 +2496,14 @@ bool SIGfx12CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
ScopeImm = AMDGPU::CPol::SCOPE_DEV;
break;
case SIAtomicScope::WORKGROUP:
- // In WGP mode the waves of a work-group can be executing on either CU of
- // the WGP. Therefore we need to invalidate the L0 which is per CU.
- // Otherwise in CU mode all waves of a work-group are on the same CU, and so
- // the L0 does not need to be invalidated.
+ // GFX12.0:
+ // In WGP mode the waves of a work-group can be executing on either CU of
+ // the WGP. Therefore we need to invalidate the L0 which is per CU.
+ // Otherwise in CU mode all waves of a work-group are on the same CU, and
+ // so the L0 does not need to be invalidated.
+ //
+ // GFX12.5
+ // TODO DOCS
if (ST.isCuModeEnabled())
return false;
@@ -2535,7 +2548,8 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
if (Pos == Position::AFTER)
++MI;
- // global_wb is only necessary at system scope for gfx120x targets.
+ // global_wb is only necessary at system scope for GFX12.0,
+ // they're also necessary at device scope for GFX12.5.
//
// Emitting it for lower scopes is a slow no-op, so we omit it
// for performance.
@@ -2545,6 +2559,12 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
.addImm(AMDGPU::CPol::SCOPE_SYS);
break;
case SIAtomicScope::AGENT:
+ // TODO DOCS
+ if (ST.hasGFX1250Insts()) {
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::GLOBAL_WB))
+ .addImm(AMDGPU::CPol::SCOPE_DEV);
+ }
+ break;
case SIAtomicScope::WORKGROUP:
// No WB necessary, but we still have to wait.
break;
@@ -2607,17 +2627,32 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
}
bool SIGfx12CacheControl::finalizeStore(MachineInstr &MI, bool Atomic) const {
- MachineOperand *CPol = TII->getNamedOperand(MI, OpName::cpol);
- if (!CPol)
- return false;
+ assert(MI.mayStore() && "Not a Store inst");
+ const bool IsRMW = (MI.mayLoad() && MI.mayStore());
+ bool Changed = false;
+
+ // GFX12.5 only: xcnt wait is needed before flat and global atomics
+ // stores/rmw.
+ if (Atomic && ST.requiresWaitXCntBeforeAtomicStores() && TII->isFLAT(MI)) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(S_WAIT_XCNT_soft)).addImm(0);
+ Changed = true;
+ }
+
+ // Remaining fixes do not apply to RMWs.
+ if (IsRMW)
+ return Changed;
+ MachineOperand *CPol = TII->getNamedOperand(MI, OpName::cpol);
+ if (!CPol) // Some vmem operations do not have a scope and are not concerned.
+ return Changed;
const unsigned Scope = CPol->getImm() & CPol::SCOPE;
// GFX12.0 only: Extra waits needed before system scope stores.
if (!ST.hasGFX1250Insts()) {
if (!Atomic && Scope == CPol::SCOPE_SYS)
return insertWaitsBeforeSystemScopeStore(MI);
- return false;
+ return Changed;
}
// GFX12.5 only: Require SCOPE_SE on stores that may hit the scratch address
@@ -2627,7 +2662,7 @@ bool SIGfx12CacheControl::finalizeStore(MachineInstr &MI, bool Atomic) const {
(!ST.hasCUStores() || TII->mayAccessScratchThroughFlat(MI)))
return setScope(MI, CPol::SCOPE_SE);
- return false;
+ return Changed;
}
bool SIGfx12CacheControl::handleCooperativeAtomic(MachineInstr &MI) const {
@@ -2839,6 +2874,7 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
assert(MI->mayLoad() && MI->mayStore());
bool Changed = false;
+ MachineInstr &RMWMI = *MI;
if (MOI.isAtomic()) {
const AtomicOrdering Order = MOI.getOrdering();
@@ -2873,6 +2909,7 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
Position::AFTER);
}
+ Changed |= CC->finalizeStore(RMWMI, /*Atomic=*/true);
return Changed;
}
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index dbe0b8c496fed..12a27db241c4e 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1653,6 +1653,11 @@ let OtherPredicates = [HasImageInsts] in {
def S_WAIT_KMCNT_soft : SOPP_Pseudo <"s_soft_wait_kmcnt", (ins s16imm:$simm16), "$simm16">;
}
+
+let SubtargetPredicate = HasWaitXcnt in {
+ def S_WAIT_XCNT_soft : SOPP_Pseudo<"", (ins s16imm:$simm16), "$simm16">;
+}
+
// Represents the point at which a wave must wait for all outstanding direct loads to LDS.
// Typically inserted by the memory legalizer and consumed by SIInsertWaitcnts.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
index 481a2540eacb7..e886ea4fc6ac6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
@@ -1501,6 +1501,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1571,6 +1572,9 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1645,6 +1649,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1715,6 +1720,9 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1792,6 +1800,7 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
@@ -1902,6 +1911,9 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_mul_f64_e32 v[0:1], 4.0, v[0:1]
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1947,6 +1959,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -1987,6 +2000,9 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -2031,6 +2047,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -2107,6 +2124,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2190,6 +2208,9 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -2418,6 +2439,7 @@ define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/atomics-system-scope.ll b/llvm/test/CodeGen/AMDGPU/atomics-system-scope.ll
index 5fc9f4a0f8038..4bb2a13d02cc7 100644
--- a/llvm/test/CodeGen/AMDGPU/atomics-system-scope.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomics-system-scope.ll
@@ -364,6 +364,7 @@ define i16 @global_one_as_atomic_min_i16(ptr addrspace(1) %ptr, i16 %val) {
; GFX1250-NEXT: v_lshlrev_b32_e32 v5, v3, v5
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_and_or_b32 v6, v7, v4, v5
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v5, v[0:1], v[6:7], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
@@ -406,6 +407,7 @@ define i16 @global_one_as_atomic_umin_i16(ptr addrspace(1) %ptr, i16 %val) {
; GFX1250-NEXT: v_lshlrev_b32_e32 v5, v3, v5
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_and_or_b32 v6, v7, v4, v5
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v5, v[0:1], v[6:7], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
@@ -448,6 +450,7 @@ define i16 @global_one_as_atomic_max_i16(ptr addrspace(1) %ptr, i16 %val) {
; GFX1250-NEXT: v_lshlrev_b32_e32 v5, v3, v5
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_and_or_b32 v6, v7, v4, v5
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v5, v[0:1], v[6:7], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
@@ -490,6 +493,7 @@ define i16 @global_one_as_atomic_umax_i16(ptr addrspace(1) %ptr, i16 %val) {
; GFX1250-NEXT: v_lshlrev_b32_e32 v5, v3, v5
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_and_or_b32 v6, v7, v4, v5
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v5, v[0:1], v[6:7], off th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
@@ -1344,6 +1348,7 @@ define i16 @flat_one_as_atomic_min_i16(ptr %ptr, i16 %val) {
; GFX1250-NEXT: v_lshlrev_b32_e32 v5, v3, v5
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_and_or_b32 v6, v7, v4, v5
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v5, v[0:1], v[6:7] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
@@ -1386,6 +1391,7 @@ define i16 @flat_one_as_atomic_umin_i16(ptr %ptr, i16 %val) {
; GFX1250-NEXT: v_lshlrev_b32_e32 v5, v3, v5
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_and_or_b32 v6, v7, v4, v5
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v5, v[0:1], v[6:7] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
@@ -1428,6 +1434,7 @@ define i16 @flat_one_as_atomic_max_i16(ptr %ptr, i16 %val) {
; GFX1250-NEXT: v_lshlrev_b32_e32 v5, v3, v5
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_and_or_b32 v6, v7, v4, v5
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v5, v[0:1], v[6:7] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
@@ -1470,6 +1477,7 @@ define i16 @flat_one_as_atomic_umax_i16(ptr %ptr, i16 %val) {
; GFX1250-NEXT: v_lshlrev_b32_e32 v5, v3, v5
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_and_or_b32 v6, v7, v4, v5
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v5, v[0:1], v[6:7] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7
diff --git a/llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll b/llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll
index 3dedf008c917e..1bf37d512f845 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll
@@ -10,6 +10,8 @@
define amdgpu_ps void @flat_xchg_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_xchg_saddr_i32_nortn:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -47,6 +49,8 @@ define amdgpu_ps void @flat_xchg_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset,
define amdgpu_ps void @flat_xchg_saddr_i32_nortn_offset_2047(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_xchg_saddr_i32_nortn_offset_2047:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[2:3] offset:2047 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -85,6 +89,8 @@ define amdgpu_ps void @flat_xchg_saddr_i32_nortn_offset_2047(ptr inreg %sbase, i
define amdgpu_ps void @flat_xchg_saddr_i32_nortn_offset_neg2048(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_xchg_saddr_i32_nortn_offset_neg2048:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[2:3] offset:-2048 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -128,6 +134,8 @@ define amdgpu_ps void @flat_xchg_saddr_i32_nortn_offset_neg2048(ptr inreg %sbase
define amdgpu_ps float @flat_xchg_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_xchg_saddr_i32_rtn:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -166,6 +174,8 @@ define amdgpu_ps float @flat_xchg_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset,
define amdgpu_ps float @flat_xchg_saddr_i32_rtn_2048(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_xchg_saddr_i32_rtn_2048:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v0, v1, s[2:3] offset:2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -205,6 +215,8 @@ define amdgpu_ps float @flat_xchg_saddr_i32_rtn_2048(ptr inreg %sbase, i32 %voff
define amdgpu_ps float @flat_xchg_saddr_i32_rtn_neg2048(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_xchg_saddr_i32_rtn_neg2048:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v0, v1, s[2:3] offset:-2048 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -262,6 +274,8 @@ define amdgpu_ps float @flat_xchg_saddr_uniform_ptr_in_vgprs_rtn(i32 %voffset, i
; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v2
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s1, v3
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_swap_b32 v0, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -276,6 +290,8 @@ define amdgpu_ps float @flat_xchg_saddr_uniform_ptr_in_vgprs_rtn(i32 %voffset, i
; GFX1250-GISEL-NEXT: v_add_co_u32 v2, vcc_lo, v2, v0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v3, vcc_lo
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_swap_b32 v0, v[2:3], v1 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -325,6 +341,8 @@ define amdgpu_ps float @flat_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset(i32 %
; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v2
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s1, v3
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_swap_b32 v0, v0, v1, s[0:1] offset:42 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -339,6 +357,8 @@ define amdgpu_ps float @flat_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset(i32 %
; GFX1250-GISEL-NEXT: v_add_co_u32 v2, vcc_lo, v2, v0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v3, vcc_lo
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_swap_b32 v0, v[2:3], v1 offset:42 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -389,6 +409,8 @@ define amdgpu_ps void @flat_xchg_saddr_uniform_ptr_in_vgprs_nortn(i32 %voffset,
; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v2
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s1, v3
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -402,6 +424,8 @@ define amdgpu_ps void @flat_xchg_saddr_uniform_ptr_in_vgprs_nortn(i32 %voffset,
; GFX1250-GISEL-NEXT: v_add_co_u32 v2, vcc_lo, v2, v0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v3, vcc_lo
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_swap_b32 v[2:3], v1 scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -449,6 +473,8 @@ define amdgpu_ps void @flat_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset(i32
; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v2
; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s1, v3
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] offset:42 scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -462,6 +488,8 @@ define amdgpu_ps void @flat_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset(i32
; GFX1250-GISEL-NEXT: v_add_co_u32 v2, vcc_lo, v2, v0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v3, vcc_lo
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_swap_b32 v[2:3], v1 offset:42 scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -532,6 +560,8 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB10_5
; GFX1250-SDAG-NEXT: .LBB10_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_swap_b64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -578,6 +608,8 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB10_5
; GFX1250-GISEL-NEXT: .LBB10_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_swap_b64 v[0:1], v3, v[4:5], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -712,6 +744,8 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn_neg128(ptr inreg %sbase, i
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB11_5
; GFX1250-SDAG-NEXT: .LBB11_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_swap_b64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -761,6 +795,8 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn_neg128(ptr inreg %sbase, i
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB11_5
; GFX1250-GISEL-NEXT: .LBB11_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_swap_b64 v[0:1], v3, v[4:5], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -896,6 +932,8 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: .LBB12_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB12_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_swap_b64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -933,6 +971,8 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: .LBB12_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB12_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_swap_b64 v0, v[4:5], s[2:3] scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -1044,6 +1084,8 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v
; GFX1250-SDAG-NEXT: .LBB13_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB13_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_swap_b64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -1084,6 +1126,8 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v
; GFX1250-GISEL-NEXT: .LBB13_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB13_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_swap_b64 v0, v[4:5], s[2:3] offset:-128 scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -1186,6 +1230,8 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v
define amdgpu_ps float @flat_add_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_add_saddr_i32_rtn:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_add_u32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -1224,6 +1270,8 @@ define amdgpu_ps float @flat_add_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i
define amdgpu_ps float @flat_add_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_add_saddr_i32_rtn_neg128:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_add_u32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -1269,6 +1317,8 @@ define amdgpu_ps float @flat_add_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %vof
define amdgpu_ps void @flat_add_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_add_saddr_i32_nortn:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_add_u32 v0, v1, s[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -1305,6 +1355,8 @@ define amdgpu_ps void @flat_add_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset,
define amdgpu_ps void @flat_add_saddr_i32_nortn_neg128(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_add_saddr_i32_nortn_neg128:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_add_u32 v0, v1, s[2:3] offset:-128 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -1368,6 +1420,8 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB18_5
; GFX1250-SDAG-NEXT: .LBB18_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_add_u64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -1414,6 +1468,8 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB18_5
; GFX1250-GISEL-NEXT: .LBB18_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_add_u64 v[0:1], v3, v[4:5], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -1552,6 +1608,8 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB19_5
; GFX1250-SDAG-NEXT: .LBB19_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_add_u64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -1601,6 +1659,8 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB19_5
; GFX1250-GISEL-NEXT: .LBB19_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_add_u64 v[0:1], v3, v[4:5], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -1740,6 +1800,8 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: .LBB20_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB20_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_add_u64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -1780,6 +1842,8 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: .LBB20_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB20_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_add_u64 v0, v[4:5], s[2:3] scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -1902,6 +1966,8 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-SDAG-NEXT: .LBB21_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB21_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_add_u64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -1945,6 +2011,8 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-GISEL-NEXT: .LBB21_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB21_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_add_u64 v0, v[4:5], s[2:3] offset:-128 scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -2058,6 +2126,8 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
define amdgpu_ps float @flat_sub_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_sub_saddr_i32_rtn:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_sub_u32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -2096,6 +2166,8 @@ define amdgpu_ps float @flat_sub_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i
define amdgpu_ps float @flat_sub_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_sub_saddr_i32_rtn_neg128:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_sub_u32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -2141,6 +2213,8 @@ define amdgpu_ps float @flat_sub_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %vof
define amdgpu_ps void @flat_sub_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_sub_saddr_i32_nortn:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_sub_u32 v0, v1, s[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -2177,6 +2251,8 @@ define amdgpu_ps void @flat_sub_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset,
define amdgpu_ps void @flat_sub_saddr_i32_nortn_neg128(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_sub_saddr_i32_nortn_neg128:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_sub_u32 v0, v1, s[2:3] offset:-128 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -2240,6 +2316,8 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB26_5
; GFX1250-SDAG-NEXT: .LBB26_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_sub_u64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -2286,6 +2364,8 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB26_5
; GFX1250-GISEL-NEXT: .LBB26_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_sub_u64 v[0:1], v3, v[4:5], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -2426,6 +2506,8 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB27_5
; GFX1250-SDAG-NEXT: .LBB27_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_sub_u64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -2475,6 +2557,8 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB27_5
; GFX1250-GISEL-NEXT: .LBB27_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_sub_u64 v[0:1], v3, v[4:5], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -2616,6 +2700,8 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: .LBB28_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB28_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_sub_u64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -2656,6 +2742,8 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: .LBB28_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB28_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_sub_u64 v0, v[4:5], s[2:3] scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -2780,6 +2868,8 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-SDAG-NEXT: .LBB29_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB29_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_sub_u64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -2823,6 +2913,8 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-GISEL-NEXT: .LBB29_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB29_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_sub_u64 v0, v[4:5], s[2:3] offset:-128 scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -2938,6 +3030,8 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
define amdgpu_ps float @flat_and_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_and_saddr_i32_rtn:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_and_b32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -2976,6 +3070,8 @@ define amdgpu_ps float @flat_and_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i
define amdgpu_ps float @flat_and_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_and_saddr_i32_rtn_neg128:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_and_b32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -3021,6 +3117,8 @@ define amdgpu_ps float @flat_and_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %vof
define amdgpu_ps void @flat_and_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_and_saddr_i32_nortn:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_and_b32 v0, v1, s[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -3057,6 +3155,8 @@ define amdgpu_ps void @flat_and_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset,
define amdgpu_ps void @flat_and_saddr_i32_nortn_neg128(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_and_saddr_i32_nortn_neg128:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_and_b32 v0, v1, s[2:3] offset:-128 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -3120,6 +3220,8 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB34_5
; GFX1250-SDAG-NEXT: .LBB34_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_and_b64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -3167,6 +3269,8 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB34_5
; GFX1250-GISEL-NEXT: .LBB34_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_and_b64 v[0:1], v3, v[4:5], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -3306,6 +3410,8 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB35_5
; GFX1250-SDAG-NEXT: .LBB35_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_and_b64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -3356,6 +3462,8 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB35_5
; GFX1250-GISEL-NEXT: .LBB35_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_and_b64 v[0:1], v3, v[4:5], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -3496,6 +3604,8 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: .LBB36_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB36_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_and_b64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -3537,6 +3647,8 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: .LBB36_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB36_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_and_b64 v0, v[4:5], s[2:3] scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -3660,6 +3772,8 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-SDAG-NEXT: .LBB37_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB37_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_and_b64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -3704,6 +3818,8 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-GISEL-NEXT: .LBB37_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB37_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_and_b64 v0, v[4:5], s[2:3] offset:-128 scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -3818,6 +3934,8 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
define amdgpu_ps float @flat_or_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_or_saddr_i32_rtn:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_or_b32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -3856,6 +3974,8 @@ define amdgpu_ps float @flat_or_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i3
define amdgpu_ps float @flat_or_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_or_saddr_i32_rtn_neg128:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_or_b32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -3901,6 +4021,8 @@ define amdgpu_ps float @flat_or_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %voff
define amdgpu_ps void @flat_or_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_or_saddr_i32_nortn:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_or_b32 v0, v1, s[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -3937,6 +4059,8 @@ define amdgpu_ps void @flat_or_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset, i
define amdgpu_ps void @flat_or_saddr_i32_nortn_neg128(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_or_saddr_i32_nortn_neg128:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_or_b32 v0, v1, s[2:3] offset:-128 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -4000,6 +4124,8 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn(ptr inreg %sbase, i32 %voffs
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB42_5
; GFX1250-SDAG-NEXT: .LBB42_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_or_b64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -4047,6 +4173,8 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn(ptr inreg %sbase, i32 %voffs
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB42_5
; GFX1250-GISEL-NEXT: .LBB42_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_or_b64 v[0:1], v3, v[4:5], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -4186,6 +4314,8 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn_neg128(ptr inreg %sbase, i32
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB43_5
; GFX1250-SDAG-NEXT: .LBB43_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_or_b64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -4236,6 +4366,8 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn_neg128(ptr inreg %sbase, i32
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB43_5
; GFX1250-GISEL-NEXT: .LBB43_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_or_b64 v[0:1], v3, v[4:5], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -4376,6 +4508,8 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, i
; GFX1250-SDAG-NEXT: .LBB44_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB44_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_or_b64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -4417,6 +4551,8 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, i
; GFX1250-GISEL-NEXT: .LBB44_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB44_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_or_b64 v0, v[4:5], s[2:3] scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -4540,6 +4676,8 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vof
; GFX1250-SDAG-NEXT: .LBB45_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB45_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_or_b64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -4584,6 +4722,8 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vof
; GFX1250-GISEL-NEXT: .LBB45_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB45_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_or_b64 v0, v[4:5], s[2:3] offset:-128 scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -4698,6 +4838,8 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vof
define amdgpu_ps float @flat_xor_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_xor_saddr_i32_rtn:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_xor_b32 v0, v0, v1, s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -4736,6 +4878,8 @@ define amdgpu_ps float @flat_xor_saddr_i32_rtn(ptr inreg %sbase, i32 %voffset, i
define amdgpu_ps float @flat_xor_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_xor_saddr_i32_rtn_neg128:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_xor_b32 v0, v0, v1, s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -4781,6 +4925,8 @@ define amdgpu_ps float @flat_xor_saddr_i32_rtn_neg128(ptr inreg %sbase, i32 %vof
define amdgpu_ps void @flat_xor_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_xor_saddr_i32_nortn:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_xor_b32 v0, v1, s[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -4817,6 +4963,8 @@ define amdgpu_ps void @flat_xor_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset,
define amdgpu_ps void @flat_xor_saddr_i32_nortn_neg128(ptr inreg %sbase, i32 %voffset, i32 %data) {
; GFX1250-LABEL: flat_xor_saddr_i32_nortn_neg128:
; GFX1250: ; %bb.0:
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_xor_b32 v0, v1, s[2:3] offset:-128 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -4880,6 +5028,8 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB50_5
; GFX1250-SDAG-NEXT: .LBB50_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_xor_b64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -4927,6 +5077,8 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB50_5
; GFX1250-GISEL-NEXT: .LBB50_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_xor_b64 v[0:1], v3, v[4:5], s[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -5066,6 +5218,8 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB51_5
; GFX1250-SDAG-NEXT: .LBB51_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_xor_b64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -5116,6 +5270,8 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB51_5
; GFX1250-GISEL-NEXT: .LBB51_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_xor_b64 v[0:1], v3, v[4:5], s[2:3] offset:-128 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -5256,6 +5412,8 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: .LBB52_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB52_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_xor_b64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -5297,6 +5455,8 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: .LBB52_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB52_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_xor_b64 v0, v[4:5], s[2:3] scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -5420,6 +5580,8 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-SDAG-NEXT: .LBB53_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB53_3: ; %atomicrmw.global
+; GFX1250-SDAG-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_xor_b64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: global_inv scope:SCOPE_DEV
@@ -5464,6 +5626,8 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-GISEL-NEXT: .LBB53_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB53_3: ; %atomicrmw.global
+; GFX1250-GISEL-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_xor_b64 v0, v[4:5], s[2:3] offset:-128 scope:SCOPE_DEV
; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: global_inv scope:SCOPE_DEV
@@ -5650,7 +5814,7 @@ define amdgpu_ps void @flat_max_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-LABEL: flat_max_saddr_i32_nortn:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: flat_atomic_max_i32 v0, v1, s[2:3]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
;
; GFX950-SDAG-LABEL: flat_max_saddr_i32_nortn:
@@ -5681,7 +5845,7 @@ define amdgpu_ps void @flat_max_saddr_i32_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-LABEL: flat_max_saddr_i32_nortn_neg128:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: flat_atomic_max_i32 v0, v1, s[2:3] offset:-128
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
;
; GFX950-SDAG-LABEL: flat_max_saddr_i32_nortn_neg128:
@@ -5735,20 +5899,17 @@ define amdgpu_ps <2 x float> @flat_max_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB58_4
; GFX1250-SDAG-NEXT: .LBB58_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB58_5
; GFX1250-SDAG-NEXT: .LBB58_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: flat_atomic_max_i64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
-; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-SDAG-NEXT: s_cbranch_execz .LBB58_2
; GFX1250-SDAG-NEXT: .LBB58_4: ; %atomicrmw.private
; GFX1250-SDAG-NEXT: s_mov_b32 s1, src_flat_scratch_base_lo
; GFX1250-SDAG-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[4:5]
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_subrev_nc_u32_e32 v0, s1, v4
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo
@@ -5782,20 +5943,17 @@ define amdgpu_ps <2 x float> @flat_max_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB58_4
; GFX1250-GISEL-NEXT: .LBB58_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB58_5
; GFX1250-GISEL-NEXT: .LBB58_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: flat_atomic_max_i64 v[0:1], v3, v[4:5], s[2:3] th:TH_ATOMIC_RETURN
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr6
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
-; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-GISEL-NEXT: s_cbranch_execz .LBB58_2
; GFX1250-GISEL-NEXT: .LBB58_4: ; %atomicrmw.private
; GFX1250-GISEL-NEXT: s_mov_b32 s1, src_flat_scratch_base_lo
; GFX1250-GISEL-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[6:7]
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_subrev_nc_u32_e32 v0, s1, v6
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v6, -1, v0, vcc_lo
@@ -5923,20 +6081,17 @@ define amdgpu_ps <2 x float> @flat_max_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB59_4
; GFX1250-SDAG-NEXT: .LBB59_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB59_5
; GFX1250-SDAG-NEXT: .LBB59_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: flat_atomic_max_i64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
-; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-SDAG-NEXT: s_cbranch_execz .LBB59_2
; GFX1250-SDAG-NEXT: .LBB59_4: ; %atomicrmw.private
; GFX1250-SDAG-NEXT: s_mov_b32 s1, src_flat_scratch_base_lo
; GFX1250-SDAG-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[4:5]
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_subrev_nc_u32_e32 v0, s1, v4
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo
@@ -5973,20 +6128,17 @@ define amdgpu_ps <2 x float> @flat_max_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB59_4
; GFX1250-GISEL-NEXT: .LBB59_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB59_5
; GFX1250-GISEL-NEXT: .LBB59_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: flat_atomic_max_i64 v[0:1], v3, v[4:5], s[2:3] offset:-128 th:TH_ATOMIC_RETURN
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr6
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
-; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-GISEL-NEXT: s_cbranch_execz .LBB59_2
; GFX1250-GISEL-NEXT: .LBB59_4: ; %atomicrmw.private
; GFX1250-GISEL-NEXT: s_mov_b32 s1, src_flat_scratch_base_lo
; GFX1250-GISEL-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[6:7]
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_subrev_nc_u32_e32 v0, s1, v6
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v6, -1, v0, vcc_lo
@@ -6119,9 +6271,9 @@ define amdgpu_ps void @flat_max_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB60_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: flat_atomic_max_i64 v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-SDAG-NEXT: s_cbranch_execz .LBB60_2
@@ -6158,9 +6310,9 @@ define amdgpu_ps void @flat_max_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB60_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: flat_atomic_max_i64 v0, v[4:5], s[2:3]
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr2
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-GISEL-NEXT: s_cbranch_execz .LBB60_2
@@ -6279,9 +6431,9 @@ define amdgpu_ps void @flat_max_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB61_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: flat_atomic_max_i64 v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-SDAG-NEXT: s_cbranch_execz .LBB61_2
@@ -6321,9 +6473,9 @@ define amdgpu_ps void @flat_max_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB61_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: flat_atomic_max_i64 v0, v[4:5], s[2:3] offset:-128
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr2
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-GISEL-NEXT: s_cbranch_execz .LBB61_2
@@ -6504,7 +6656,7 @@ define amdgpu_ps void @flat_min_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-LABEL: flat_min_saddr_i32_nortn:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: flat_atomic_min_i32 v0, v1, s[2:3]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
;
; GFX950-SDAG-LABEL: flat_min_saddr_i32_nortn:
@@ -6535,7 +6687,7 @@ define amdgpu_ps void @flat_min_saddr_i32_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-LABEL: flat_min_saddr_i32_nortn_neg128:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: flat_atomic_min_i32 v0, v1, s[2:3] offset:-128
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
;
; GFX950-SDAG-LABEL: flat_min_saddr_i32_nortn_neg128:
@@ -6589,20 +6741,17 @@ define amdgpu_ps <2 x float> @flat_min_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB66_4
; GFX1250-SDAG-NEXT: .LBB66_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB66_5
; GFX1250-SDAG-NEXT: .LBB66_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: flat_atomic_min_i64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
-; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-SDAG-NEXT: s_cbranch_execz .LBB66_2
; GFX1250-SDAG-NEXT: .LBB66_4: ; %atomicrmw.private
; GFX1250-SDAG-NEXT: s_mov_b32 s1, src_flat_scratch_base_lo
; GFX1250-SDAG-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[4:5]
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_subrev_nc_u32_e32 v0, s1, v4
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo
@@ -6636,20 +6785,17 @@ define amdgpu_ps <2 x float> @flat_min_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB66_4
; GFX1250-GISEL-NEXT: .LBB66_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB66_5
; GFX1250-GISEL-NEXT: .LBB66_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: flat_atomic_min_i64 v[0:1], v3, v[4:5], s[2:3] th:TH_ATOMIC_RETURN
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr6
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
-; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-GISEL-NEXT: s_cbranch_execz .LBB66_2
; GFX1250-GISEL-NEXT: .LBB66_4: ; %atomicrmw.private
; GFX1250-GISEL-NEXT: s_mov_b32 s1, src_flat_scratch_base_lo
; GFX1250-GISEL-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[6:7]
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_subrev_nc_u32_e32 v0, s1, v6
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v6, -1, v0, vcc_lo
@@ -6777,20 +6923,17 @@ define amdgpu_ps <2 x float> @flat_min_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB67_4
; GFX1250-SDAG-NEXT: .LBB67_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB67_5
; GFX1250-SDAG-NEXT: .LBB67_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: flat_atomic_min_i64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
-; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-SDAG-NEXT: s_cbranch_execz .LBB67_2
; GFX1250-SDAG-NEXT: .LBB67_4: ; %atomicrmw.private
; GFX1250-SDAG-NEXT: s_mov_b32 s1, src_flat_scratch_base_lo
; GFX1250-SDAG-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[4:5]
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_subrev_nc_u32_e32 v0, s1, v4
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo
@@ -6827,20 +6970,17 @@ define amdgpu_ps <2 x float> @flat_min_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB67_4
; GFX1250-GISEL-NEXT: .LBB67_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB67_5
; GFX1250-GISEL-NEXT: .LBB67_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: flat_atomic_min_i64 v[0:1], v3, v[4:5], s[2:3] offset:-128 th:TH_ATOMIC_RETURN
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr6
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
-; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-GISEL-NEXT: s_cbranch_execz .LBB67_2
; GFX1250-GISEL-NEXT: .LBB67_4: ; %atomicrmw.private
; GFX1250-GISEL-NEXT: s_mov_b32 s1, src_flat_scratch_base_lo
; GFX1250-GISEL-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[6:7]
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_subrev_nc_u32_e32 v0, s1, v6
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v6, -1, v0, vcc_lo
@@ -6973,9 +7113,9 @@ define amdgpu_ps void @flat_min_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB68_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: flat_atomic_min_i64 v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-SDAG-NEXT: s_cbranch_execz .LBB68_2
@@ -7012,9 +7152,9 @@ define amdgpu_ps void @flat_min_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB68_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: flat_atomic_min_i64 v0, v[4:5], s[2:3]
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr2
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-GISEL-NEXT: s_cbranch_execz .LBB68_2
@@ -7133,9 +7273,9 @@ define amdgpu_ps void @flat_min_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB69_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: flat_atomic_min_i64 v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-SDAG-NEXT: s_cbranch_execz .LBB69_2
@@ -7175,9 +7315,9 @@ define amdgpu_ps void @flat_min_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB69_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: flat_atomic_min_i64 v0, v[4:5], s[2:3] offset:-128
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr2
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-GISEL-NEXT: s_cbranch_execz .LBB69_2
@@ -7358,7 +7498,7 @@ define amdgpu_ps void @flat_umax_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-LABEL: flat_umax_saddr_i32_nortn:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: flat_atomic_max_u32 v0, v1, s[2:3]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
;
; GFX950-SDAG-LABEL: flat_umax_saddr_i32_nortn:
@@ -7389,7 +7529,7 @@ define amdgpu_ps void @flat_umax_saddr_i32_nortn_neg128(ptr inreg %sbase, i32 %v
; GFX1250-LABEL: flat_umax_saddr_i32_nortn_neg128:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: flat_atomic_max_u32 v0, v1, s[2:3] offset:-128
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
;
; GFX950-SDAG-LABEL: flat_umax_saddr_i32_nortn_neg128:
@@ -7443,20 +7583,17 @@ define amdgpu_ps <2 x float> @flat_umax_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB74_4
; GFX1250-SDAG-NEXT: .LBB74_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB74_5
; GFX1250-SDAG-NEXT: .LBB74_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: flat_atomic_max_u64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
-; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-SDAG-NEXT: s_cbranch_execz .LBB74_2
; GFX1250-SDAG-NEXT: .LBB74_4: ; %atomicrmw.private
; GFX1250-SDAG-NEXT: s_mov_b32 s1, src_flat_scratch_base_lo
; GFX1250-SDAG-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[4:5]
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_subrev_nc_u32_e32 v0, s1, v4
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo
@@ -7490,20 +7627,17 @@ define amdgpu_ps <2 x float> @flat_umax_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB74_4
; GFX1250-GISEL-NEXT: .LBB74_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB74_5
; GFX1250-GISEL-NEXT: .LBB74_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: flat_atomic_max_u64 v[0:1], v3, v[4:5], s[2:3] th:TH_ATOMIC_RETURN
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr6
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
-; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-GISEL-NEXT: s_cbranch_execz .LBB74_2
; GFX1250-GISEL-NEXT: .LBB74_4: ; %atomicrmw.private
; GFX1250-GISEL-NEXT: s_mov_b32 s1, src_flat_scratch_base_lo
; GFX1250-GISEL-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[6:7]
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_subrev_nc_u32_e32 v0, s1, v6
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v6, -1, v0, vcc_lo
@@ -7631,20 +7765,17 @@ define amdgpu_ps <2 x float> @flat_umax_saddr_i64_rtn_neg128(ptr inreg %sbase, i
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB75_4
; GFX1250-SDAG-NEXT: .LBB75_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB75_5
; GFX1250-SDAG-NEXT: .LBB75_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: flat_atomic_max_u64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
-; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-SDAG-NEXT: s_cbranch_execz .LBB75_2
; GFX1250-SDAG-NEXT: .LBB75_4: ; %atomicrmw.private
; GFX1250-SDAG-NEXT: s_mov_b32 s1, src_flat_scratch_base_lo
; GFX1250-SDAG-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[4:5]
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_subrev_nc_u32_e32 v0, s1, v4
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo
@@ -7681,20 +7812,17 @@ define amdgpu_ps <2 x float> @flat_umax_saddr_i64_rtn_neg128(ptr inreg %sbase, i
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB75_4
; GFX1250-GISEL-NEXT: .LBB75_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB75_5
; GFX1250-GISEL-NEXT: .LBB75_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: flat_atomic_max_u64 v[0:1], v3, v[4:5], s[2:3] offset:-128 th:TH_ATOMIC_RETURN
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr6
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
-; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-GISEL-NEXT: s_cbranch_execz .LBB75_2
; GFX1250-GISEL-NEXT: .LBB75_4: ; %atomicrmw.private
; GFX1250-GISEL-NEXT: s_mov_b32 s1, src_flat_scratch_base_lo
; GFX1250-GISEL-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[6:7]
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_subrev_nc_u32_e32 v0, s1, v6
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v6, -1, v0, vcc_lo
@@ -7827,9 +7955,9 @@ define amdgpu_ps void @flat_umax_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB76_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: flat_atomic_max_u64 v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-SDAG-NEXT: s_cbranch_execz .LBB76_2
@@ -7866,9 +7994,9 @@ define amdgpu_ps void @flat_umax_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB76_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: flat_atomic_max_u64 v0, v[4:5], s[2:3]
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr2
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-GISEL-NEXT: s_cbranch_execz .LBB76_2
@@ -7987,9 +8115,9 @@ define amdgpu_ps void @flat_umax_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB77_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: flat_atomic_max_u64 v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-SDAG-NEXT: s_cbranch_execz .LBB77_2
@@ -8029,9 +8157,9 @@ define amdgpu_ps void @flat_umax_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB77_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: flat_atomic_max_u64 v0, v[4:5], s[2:3] offset:-128
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr2
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-GISEL-NEXT: s_cbranch_execz .LBB77_2
@@ -8212,7 +8340,7 @@ define amdgpu_ps void @flat_umin_saddr_i32_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-LABEL: flat_umin_saddr_i32_nortn:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: flat_atomic_min_u32 v0, v1, s[2:3]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
;
; GFX950-SDAG-LABEL: flat_umin_saddr_i32_nortn:
@@ -8243,7 +8371,7 @@ define amdgpu_ps void @flat_umin_saddr_i32_nortn_neg128(ptr inreg %sbase, i32 %v
; GFX1250-LABEL: flat_umin_saddr_i32_nortn_neg128:
; GFX1250: ; %bb.0:
; GFX1250-NEXT: flat_atomic_min_u32 v0, v1, s[2:3] offset:-128
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
;
; GFX950-SDAG-LABEL: flat_umin_saddr_i32_nortn_neg128:
@@ -8297,20 +8425,17 @@ define amdgpu_ps <2 x float> @flat_umin_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB82_4
; GFX1250-SDAG-NEXT: .LBB82_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB82_5
; GFX1250-SDAG-NEXT: .LBB82_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: flat_atomic_min_u64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
-; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-SDAG-NEXT: s_cbranch_execz .LBB82_2
; GFX1250-SDAG-NEXT: .LBB82_4: ; %atomicrmw.private
; GFX1250-SDAG-NEXT: s_mov_b32 s1, src_flat_scratch_base_lo
; GFX1250-SDAG-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[4:5]
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_subrev_nc_u32_e32 v0, s1, v4
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo
@@ -8344,20 +8469,17 @@ define amdgpu_ps <2 x float> @flat_umin_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB82_4
; GFX1250-GISEL-NEXT: .LBB82_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB82_5
; GFX1250-GISEL-NEXT: .LBB82_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: flat_atomic_min_u64 v[0:1], v3, v[4:5], s[2:3] th:TH_ATOMIC_RETURN
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr6
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
-; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-GISEL-NEXT: s_cbranch_execz .LBB82_2
; GFX1250-GISEL-NEXT: .LBB82_4: ; %atomicrmw.private
; GFX1250-GISEL-NEXT: s_mov_b32 s1, src_flat_scratch_base_lo
; GFX1250-GISEL-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[6:7]
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_subrev_nc_u32_e32 v0, s1, v6
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v6, -1, v0, vcc_lo
@@ -8485,20 +8607,17 @@ define amdgpu_ps <2 x float> @flat_umin_saddr_i64_rtn_neg128(ptr inreg %sbase, i
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB83_4
; GFX1250-SDAG-NEXT: .LBB83_2: ; %atomicrmw.phi
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_branch .LBB83_5
; GFX1250-SDAG-NEXT: .LBB83_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: flat_atomic_min_u64 v[0:1], v[4:5], v[2:3] th:TH_ATOMIC_RETURN
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
-; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-SDAG-NEXT: s_cbranch_execz .LBB83_2
; GFX1250-SDAG-NEXT: .LBB83_4: ; %atomicrmw.private
; GFX1250-SDAG-NEXT: s_mov_b32 s1, src_flat_scratch_base_lo
; GFX1250-SDAG-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[4:5]
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_subrev_nc_u32_e32 v0, s1, v4
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo
@@ -8535,20 +8654,17 @@ define amdgpu_ps <2 x float> @flat_umin_saddr_i64_rtn_neg128(ptr inreg %sbase, i
; GFX1250-GISEL-NEXT: s_cbranch_execnz .LBB83_4
; GFX1250-GISEL-NEXT: .LBB83_2: ; %atomicrmw.phi
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: s_branch .LBB83_5
; GFX1250-GISEL-NEXT: .LBB83_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: flat_atomic_min_u64 v[0:1], v3, v[4:5], s[2:3] offset:-128 th:TH_ATOMIC_RETURN
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr6
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
-; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-GISEL-NEXT: s_cbranch_execz .LBB83_2
; GFX1250-GISEL-NEXT: .LBB83_4: ; %atomicrmw.private
; GFX1250-GISEL-NEXT: s_mov_b32 s1, src_flat_scratch_base_lo
; GFX1250-GISEL-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[6:7]
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_subrev_nc_u32_e32 v0, s1, v6
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v6, -1, v0, vcc_lo
@@ -8681,9 +8797,9 @@ define amdgpu_ps void @flat_umin_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB84_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: flat_atomic_min_u64 v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-SDAG-NEXT: s_cbranch_execz .LBB84_2
@@ -8720,9 +8836,9 @@ define amdgpu_ps void @flat_umin_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB84_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: flat_atomic_min_u64 v0, v[4:5], s[2:3]
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr2
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-GISEL-NEXT: s_cbranch_execz .LBB84_2
@@ -8841,9 +8957,9 @@ define amdgpu_ps void @flat_umin_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v
; GFX1250-SDAG-NEXT: s_endpgm
; GFX1250-SDAG-NEXT: .LBB85_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: flat_atomic_min_u64 v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-SDAG-NEXT: s_cbranch_execz .LBB85_2
@@ -8883,9 +8999,9 @@ define amdgpu_ps void @flat_umin_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v
; GFX1250-GISEL-NEXT: s_endpgm
; GFX1250-GISEL-NEXT: .LBB85_3: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: flat_atomic_min_u64 v0, v[4:5], s[2:3] offset:-128
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr2
; GFX1250-GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_and_not1_saveexec_b32 s0, s0
; GFX1250-GISEL-NEXT: s_cbranch_execz .LBB85_2
@@ -11743,7 +11859,9 @@ define double @flat_atomic_fadd_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX1250-SDAG-NEXT: s_cbranch_vccz .LBB110_4
; GFX1250-SDAG-NEXT: ; %bb.2: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: global_atomic_add_f64 v[2:3], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: s_cbranch_execz .LBB110_5
; GFX1250-SDAG-NEXT: s_branch .LBB110_6
; GFX1250-SDAG-NEXT: .LBB110_3:
@@ -11757,7 +11875,6 @@ define double @flat_atomic_fadd_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX1250-SDAG-NEXT: s_sub_co_i32 s2, s0, s2
; GFX1250-SDAG-NEXT: s_cmp_lg_u64 s[0:1], 0
; GFX1250-SDAG-NEXT: s_cselect_b32 s2, s2, -1
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: scratch_load_b64 v[2:3], off, s2
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_add_f64_e32 v[4:5], v[2:3], v[0:1]
@@ -11768,12 +11885,12 @@ define double @flat_atomic_fadd_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX1250-SDAG-NEXT: .LBB110_7: ; %atomicrmw.shared
; GFX1250-SDAG-NEXT: s_cmp_lg_u64 s[0:1], 0
; GFX1250-SDAG-NEXT: s_cselect_b32 s0, s0, -1
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: ds_add_rtn_f64 v[2:3], v2, v[0:1]
; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: .LBB110_8: ; %atomicrmw.end
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
@@ -11799,7 +11916,9 @@ define double @flat_atomic_fadd_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX1250-GISEL-NEXT: ; %bb.2: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-GISEL-NEXT: s_mov_b32 s2, 0
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: global_atomic_add_f64 v[2:3], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
+; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: .LBB110_3: ; %Flow
; GFX1250-GISEL-NEXT: s_xor_b32 s2, s2, 1
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -11811,7 +11930,6 @@ define double @flat_atomic_fadd_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX1250-GISEL-NEXT: s_sub_co_i32 s2, s0, s2
; GFX1250-GISEL-NEXT: s_cmp_lg_u64 s[0:1], 0
; GFX1250-GISEL-NEXT: s_cselect_b32 s2, s2, -1
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: scratch_load_b64 v[2:3], off, s2
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_add_f64_e32 v[4:5], v[2:3], v[0:1]
@@ -11827,12 +11945,12 @@ define double @flat_atomic_fadd_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX1250-GISEL-NEXT: ; %bb.7: ; %atomicrmw.shared
; GFX1250-GISEL-NEXT: s_cmp_lg_u64 s[0:1], 0
; GFX1250-GISEL-NEXT: s_cselect_b32 s0, s0, -1
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: ds_add_rtn_f64 v[2:3], v2, v[0:1]
; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: .LBB110_8: ; %atomicrmw.end
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
;
@@ -11969,7 +12087,9 @@ define void @flat_atomic_fadd_f64_saddr_nortn(ptr inreg %ptr, double %data) {
; GFX1250-SDAG-NEXT: ; %bb.4: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-SDAG-NEXT: s_mov_b32 s2, 0
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1]
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: .LBB111_5: ; %Flow
; GFX1250-SDAG-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s2
; GFX1250-SDAG-NEXT: s_cbranch_vccnz .LBB111_7
@@ -11991,6 +12111,7 @@ define void @flat_atomic_fadd_f64_saddr_nortn(ptr inreg %ptr, double %data) {
; GFX1250-SDAG-NEXT: s_cselect_b32 s0, s0, -1
; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: ds_add_f64 v2, v[0:1]
; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
@@ -12015,7 +12136,9 @@ define void @flat_atomic_fadd_f64_saddr_nortn(ptr inreg %ptr, double %data) {
; GFX1250-GISEL-NEXT: ; %bb.2: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-GISEL-NEXT: s_mov_b32 s2, 0
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1]
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: .LBB111_3: ; %Flow
; GFX1250-GISEL-NEXT: s_xor_b32 s2, s2, 1
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -12044,6 +12167,7 @@ define void @flat_atomic_fadd_f64_saddr_nortn(ptr inreg %ptr, double %data) {
; GFX1250-GISEL-NEXT: s_cselect_b32 s0, s0, -1
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: ds_add_f64 v2, v[0:1]
; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
; GFX1250-GISEL-NEXT: .LBB111_8: ; %atomicrmw.phi
@@ -12162,20 +12286,19 @@ define double @flat_atomic_fmax_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX1250-SDAG-NEXT: s_cbranch_vccz .LBB112_2
; GFX1250-SDAG-NEXT: ; %bb.1: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_max_num_f64 v[2:3], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_cbranch_execz .LBB112_3
; GFX1250-SDAG-NEXT: s_branch .LBB112_4
; GFX1250-SDAG-NEXT: .LBB112_2:
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX1250-SDAG-NEXT: .LBB112_3: ; %atomicrmw.private
; GFX1250-SDAG-NEXT: s_mov_b32 s2, src_flat_scratch_base_lo
-; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
; GFX1250-SDAG-NEXT: s_sub_co_i32 s2, s0, s2
; GFX1250-SDAG-NEXT: s_cmp_lg_u64 s[0:1], 0
; GFX1250-SDAG-NEXT: s_cselect_b32 s0, s2, -1
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: scratch_load_b64 v[2:3], off, s0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[2:3], v[2:3]
@@ -12183,7 +12306,7 @@ define double @flat_atomic_fmax_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX1250-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[4:5], v[0:1]
; GFX1250-SDAG-NEXT: scratch_store_b64 off, v[0:1], s0 scope:SCOPE_SE
; GFX1250-SDAG-NEXT: .LBB112_4: ; %atomicrmw.end
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
@@ -12203,10 +12326,10 @@ define double @flat_atomic_fmax_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX1250-GISEL-NEXT: ; %bb.1: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_max_num_f64 v[2:3], v2, v[0:1], s[0:1] offset:80 th:TH_ATOMIC_RETURN
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: .LBB112_2: ; %Flow
-; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_xor_b32 s0, s4, 1
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-GISEL-NEXT: s_cmp_lg_u32 s0, 0
@@ -12217,7 +12340,6 @@ define double @flat_atomic_fmax_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX1250-GISEL-NEXT: s_sub_co_i32 s0, s2, s0
; GFX1250-GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
; GFX1250-GISEL-NEXT: s_cselect_b32 s0, s0, -1
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: scratch_load_b64 v[2:3], off, s0
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[2:3], v[2:3]
@@ -12225,7 +12347,7 @@ define double @flat_atomic_fmax_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX1250-GISEL-NEXT: v_max_num_f64_e32 v[0:1], v[4:5], v[0:1]
; GFX1250-GISEL-NEXT: scratch_store_b64 off, v[0:1], s0 scope:SCOPE_SE
; GFX1250-GISEL-NEXT: .LBB112_4: ; %atomicrmw.end
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
;
@@ -12322,8 +12444,9 @@ define void @flat_atomic_fmax_f64_saddr_nortn(ptr inreg %ptr, double %data) {
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
; GFX1250-SDAG-NEXT: .LBB113_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_max_num_f64 v2, v[0:1], s[0:1]
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB113_2
; GFX1250-SDAG-NEXT: .LBB113_4: ; %atomicrmw.private
; GFX1250-SDAG-NEXT: s_mov_b32 s2, src_flat_scratch_base_lo
@@ -12355,8 +12478,9 @@ define void @flat_atomic_fmax_f64_saddr_nortn(ptr inreg %ptr, double %data) {
; GFX1250-GISEL-NEXT: ; %bb.1: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_max_num_f64 v2, v[0:1], s[0:1] offset:80
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: .LBB113_2: ; %Flow
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_xor_b32 s0, s4, 1
@@ -12463,20 +12587,19 @@ define double @flat_atomic_fmin_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX1250-SDAG-NEXT: s_cbranch_vccz .LBB114_2
; GFX1250-SDAG-NEXT: ; %bb.1: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_min_num_f64 v[2:3], v2, v[0:1], s[0:1] th:TH_ATOMIC_RETURN
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_cbranch_execz .LBB114_3
; GFX1250-SDAG-NEXT: s_branch .LBB114_4
; GFX1250-SDAG-NEXT: .LBB114_2:
; GFX1250-SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX1250-SDAG-NEXT: .LBB114_3: ; %atomicrmw.private
; GFX1250-SDAG-NEXT: s_mov_b32 s2, src_flat_scratch_base_lo
-; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
; GFX1250-SDAG-NEXT: s_sub_co_i32 s2, s0, s2
; GFX1250-SDAG-NEXT: s_cmp_lg_u64 s[0:1], 0
; GFX1250-SDAG-NEXT: s_cselect_b32 s0, s2, -1
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: scratch_load_b64 v[2:3], off, s0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_max_num_f64_e32 v[4:5], v[2:3], v[2:3]
@@ -12484,7 +12607,7 @@ define double @flat_atomic_fmin_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX1250-SDAG-NEXT: v_min_num_f64_e32 v[0:1], v[4:5], v[0:1]
; GFX1250-SDAG-NEXT: scratch_store_b64 off, v[0:1], s0 scope:SCOPE_SE
; GFX1250-SDAG-NEXT: .LBB114_4: ; %atomicrmw.end
-; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
;
@@ -12504,10 +12627,10 @@ define double @flat_atomic_fmin_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX1250-GISEL-NEXT: ; %bb.1: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_min_num_f64 v[2:3], v2, v[0:1], s[0:1] offset:80 th:TH_ATOMIC_RETURN
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: .LBB114_2: ; %Flow
-; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_xor_b32 s0, s4, 1
; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-GISEL-NEXT: s_cmp_lg_u32 s0, 0
@@ -12518,7 +12641,6 @@ define double @flat_atomic_fmin_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX1250-GISEL-NEXT: s_sub_co_i32 s0, s2, s0
; GFX1250-GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
; GFX1250-GISEL-NEXT: s_cselect_b32 s0, s0, -1
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: scratch_load_b64 v[2:3], off, s0
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_max_num_f64_e32 v[4:5], v[2:3], v[2:3]
@@ -12526,7 +12648,7 @@ define double @flat_atomic_fmin_f64_saddr_rtn(ptr inreg %ptr, double %data) {
; GFX1250-GISEL-NEXT: v_min_num_f64_e32 v[0:1], v[4:5], v[0:1]
; GFX1250-GISEL-NEXT: scratch_store_b64 off, v[0:1], s0 scope:SCOPE_SE
; GFX1250-GISEL-NEXT: .LBB114_4: ; %atomicrmw.end
-; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3
; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
;
@@ -12623,8 +12745,9 @@ define void @flat_atomic_fmin_f64_saddr_nortn(ptr inreg %ptr, double %data) {
; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
; GFX1250-SDAG-NEXT: .LBB115_3: ; %atomicrmw.global
; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-SDAG-NEXT: s_wait_storecnt 0x0
; GFX1250-SDAG-NEXT: flat_atomic_min_num_f64 v2, v[0:1], s[0:1]
-; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-SDAG-NEXT: s_cbranch_execnz .LBB115_2
; GFX1250-SDAG-NEXT: .LBB115_4: ; %atomicrmw.private
; GFX1250-SDAG-NEXT: s_mov_b32 s2, src_flat_scratch_base_lo
@@ -12656,8 +12779,9 @@ define void @flat_atomic_fmin_f64_saddr_nortn(ptr inreg %ptr, double %data) {
; GFX1250-GISEL-NEXT: ; %bb.1: ; %atomicrmw.global
; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX1250-GISEL-NEXT: s_wait_storecnt 0x0
; GFX1250-GISEL-NEXT: flat_atomic_min_num_f64 v2, v[0:1], s[0:1] offset:80
-; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-GISEL-NEXT: .LBB115_2: ; %Flow
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_xor_b32 s0, s4, 1
@@ -12754,6 +12878,7 @@ define float @flat_atomic_fadd_f32_saddr_rtn(ptr inreg %ptr, float %data) {
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_add_f32 v0, v1, v0, s[0:1] offset:40 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -12785,8 +12910,9 @@ define void @flat_atomic_fadd_f32_saddr_nortn(ptr inreg %ptr, float %data) {
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_add_f32 v1, v0, s[0:1] offset:40
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
;
; GFX950-SDAG-LABEL: flat_atomic_fadd_f32_saddr_nortn:
@@ -12816,6 +12942,7 @@ define float @flat_atomic_fmax_f32_saddr_rtn(ptr inreg %ptr, float %data) {
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_max_num_f32 v0, v1, v0, s[0:1] offset:40 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -12878,8 +13005,9 @@ define void @flat_atomic_fmax_f32_saddr_nortn(ptr inreg %ptr, float %data) {
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_max_num_f32 v1, v0, s[0:1] offset:40
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
;
; GFX950-SDAG-LABEL: flat_atomic_fmax_f32_saddr_nortn:
@@ -12939,6 +13067,7 @@ define float @flat_atomic_fmin_f32_saddr_rtn(ptr inreg %ptr, float %data) {
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_min_num_f32 v0, v1, v0, s[0:1] offset:40 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -13001,8 +13130,9 @@ define void @flat_atomic_fmin_f32_saddr_nortn(ptr inreg %ptr, float %data) {
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_min_num_f32 v1, v0, s[0:1] offset:40
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
;
; GFX950-SDAG-LABEL: flat_atomic_fmin_f32_saddr_nortn:
@@ -13062,6 +13192,7 @@ define <2 x half> @flat_atomic_fadd_v2f16_saddr_rtn(ptr inreg %ptr, <2 x half> %
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_pk_add_f16 v0, v1, v0, s[0:1] offset:40 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -13093,8 +13224,9 @@ define void @flat_atomic_fadd_v2f16_saddr_nortn(ptr inreg %ptr, <2 x half> %data
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_pk_add_f16 v1, v0, s[0:1] offset:40
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
;
; GFX950-SDAG-LABEL: flat_atomic_fadd_v2f16_saddr_nortn:
@@ -13135,11 +13267,13 @@ define <2 x half> @flat_atomic_fmax_v2f16_saddr_rtn(ptr inreg %ptr, <2 x half> %
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_pk_max_num_f16 v0, v5, v5
; GFX1250-NEXT: v_pk_max_num_f16 v4, v0, v1
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v2, v[4:5], s[0:1] offset:40 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v5
; GFX1250-NEXT: s_or_b32 s2, vcc_lo, s2
-; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s2
; GFX1250-NEXT: s_cbranch_execnz .LBB124_1
; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -13215,10 +13349,11 @@ define void @flat_atomic_fmax_v2f16_saddr_nortn(ptr inreg %ptr, <2 x half> %data
; GFX1250-NEXT: v_pk_max_num_f16 v0, v1, v1
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_pk_max_num_f16 v0, v0, v3
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:40 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
; GFX1250-NEXT: s_or_b32 s2, vcc_lo, s2
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -13298,11 +13433,13 @@ define <2 x half> @flat_atomic_fmin_v2f16_saddr_rtn(ptr inreg %ptr, <2 x half> %
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1250-NEXT: v_pk_max_num_f16 v0, v5, v5
; GFX1250-NEXT: v_pk_min_num_f16 v4, v0, v1
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v2, v[4:5], s[0:1] offset:40 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v5
; GFX1250-NEXT: s_or_b32 s2, vcc_lo, s2
-; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s2
; GFX1250-NEXT: s_cbranch_execnz .LBB126_1
; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -13378,10 +13515,11 @@ define void @flat_atomic_fmin_v2f16_saddr_nortn(ptr inreg %ptr, <2 x half> %data
; GFX1250-NEXT: v_pk_max_num_f16 v0, v1, v1
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_pk_min_num_f16 v0, v0, v3
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v2, v[0:1], s[0:1] offset:40 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
; GFX1250-NEXT: s_or_b32 s2, vcc_lo, s2
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -13450,6 +13588,7 @@ define <2 x bfloat> @flat_atomic_fadd_v2bf16_saddr_rtn(ptr inreg %ptr, <2 x bflo
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_pk_add_bf16 v0, v1, v0, s[0:1] offset:40 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -13473,8 +13612,9 @@ define void @flat_atomic_fadd_v2bf16_saddr_nortn(ptr inreg %ptr, <2 x bfloat> %d
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_pk_add_bf16 v1, v0, s[0:1] offset:40
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
;
; GFX950-LABEL: flat_atomic_fadd_v2bf16_saddr_nortn:
@@ -13504,11 +13644,13 @@ define <2 x bfloat> @flat_atomic_fmax_v2bf16_saddr_rtn(ptr inreg %ptr, <2 x bflo
; GFX1250-NEXT: v_mov_b32_e32 v5, v1
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_pk_max_num_bf16 v4, v5, v0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v2, v[4:5], s[0:1] offset:40 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v5
; GFX1250-NEXT: s_or_b32 s2, vcc_lo, s2
-; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s2
; GFX1250-NEXT: s_cbranch_execnz .LBB130_1
; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -13560,10 +13702,11 @@ define void @flat_atomic_fmax_v2bf16_saddr_nortn(ptr inreg %ptr, <2 x bfloat> %d
; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: v_pk_max_num_bf16 v2, v3, v0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v2, v1, v[2:3], s[0:1] offset:40 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v3, v2
; GFX1250-NEXT: s_or_b32 s2, vcc_lo, s2
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -13618,11 +13761,13 @@ define <2 x bfloat> @flat_atomic_fmin_v2bf16_saddr_rtn(ptr inreg %ptr, <2 x bflo
; GFX1250-NEXT: v_mov_b32_e32 v5, v1
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-NEXT: v_pk_min_num_bf16 v4, v5, v0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v2, v[4:5], s[0:1] offset:40 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v5
; GFX1250-NEXT: s_or_b32 s2, vcc_lo, s2
-; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1250-NEXT: s_and_not1_b32 exec_lo, exec_lo, s2
; GFX1250-NEXT: s_cbranch_execnz .LBB132_1
; GFX1250-NEXT: ; %bb.2: ; %atomicrmw.end
@@ -13674,10 +13819,11 @@ define void @flat_atomic_fmin_v2bf16_saddr_nortn(ptr inreg %ptr, <2 x bfloat> %d
; GFX1250-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: v_pk_min_num_bf16 v2, v3, v0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v2, v1, v[2:3], s[0:1] offset:40 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3
-; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v3, v2
; GFX1250-NEXT: s_or_b32 s2, vcc_lo, s2
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx942.ll b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx942.ll
index 1e293c28ce397..ba761bedb905c 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx942.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx942.ll
@@ -38,6 +38,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_atomic_add_f32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -79,6 +80,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4.0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_atomic_add_f32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -189,6 +191,7 @@ define <2 x half> @local_atomic_fadd_v2f16_rtn(ptr addrspace(3) %ptr, <2 x half>
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: ds_pk_add_rtn_f16 v0, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -255,6 +258,7 @@ define <2 x i16> @local_atomic_fadd_v2bf16_rtn(ptr addrspace(3) %ptr, <2 x i16>
; GFX1250: ; %bb.0:
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: ds_pk_add_rtn_bf16 v0, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
index 3856f0c327495..160b35352d8a4 100644
--- a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
@@ -1473,6 +1473,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1513,6 +1514,9 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1557,6 +1561,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1597,6 +1602,9 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1673,6 +1681,7 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, doubl
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
@@ -1765,6 +1774,9 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1809,6 +1821,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -1849,6 +1862,9 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -1893,6 +1909,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -1969,6 +1986,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -2063,6 +2081,9 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1250-NEXT: v_mov_b64_e32 v[0:1], 4.0
; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_atomic_add_f64 v2, v[0:1], s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -2136,6 +2157,7 @@ define double @local_atomic_fadd_f64_rtn(ptr addrspace(3) %ptr, double %data) {
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -2275,6 +2297,7 @@ define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b64_e32 v[2:3], 4.0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -2307,6 +2330,7 @@ define double @local_atomic_fadd_f64_rtn_ieee_unsafe(ptr addrspace(3) %ptr, doub
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -2339,6 +2363,7 @@ define double @local_atomic_fadd_f64_rtn_ieee_safe(ptr addrspace(3) %ptr, double
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3]
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-agent.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-agent.ll
index e3ec4d1f0f67a..614a221d43d53 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-agent.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-agent.ll
@@ -130,6 +130,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr noundef
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -144,6 +145,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr noundef
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -158,6 +160,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr noundef
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -220,6 +223,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr noundef
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -234,6 +238,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr noundef
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -248,6 +253,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr noundef
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -385,6 +391,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -399,6 +406,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -413,6 +421,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -475,6 +484,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -489,6 +499,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -503,6 +514,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-workgroup.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-workgroup.ll
index e86f0e0083805..2b04ab5ab8a00 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-workgroup.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cooperative.atomic-workgroup.ll
@@ -124,6 +124,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr noundef
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -137,6 +138,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr noundef
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -150,6 +152,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr noundef
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -163,6 +166,7 @@ define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr noundef r
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -176,6 +180,7 @@ define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(ptr nou
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -189,6 +194,7 @@ define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(ptr nou
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -202,6 +208,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr noundef
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -215,6 +222,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr noundef
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -228,6 +236,7 @@ define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr noundef
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -358,6 +367,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -371,6 +381,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -384,6 +395,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -397,6 +409,7 @@ define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr no
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -410,6 +423,7 @@ define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -423,6 +437,7 @@ define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -436,6 +451,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -449,6 +465,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -462,6 +479,7 @@ define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-barriers.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-barriers.ll
index e921f581c00a7..516c3946f63dc 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-barriers.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-barriers.ll
@@ -45,7 +45,6 @@ define amdgpu_kernel void @test_s_barrier() {
;
; GFX1250-LABEL: test_s_barrier:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_alu 0xffe3
; GFX1250-NEXT: s_barrier_signal -1
; GFX1250-NEXT: s_barrier_wait -1
; GFX1250-NEXT: s_endpgm
@@ -103,8 +102,8 @@ define amdgpu_kernel void @test_s_barrier_workgroup_fence() {
;
; GFX1250-LABEL: test_s_barrier_workgroup_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_dscnt 0x0
-; GFX1250-NEXT: s_wait_alu 0xffe3
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_barrier_signal -1
; GFX1250-NEXT: s_barrier_wait -1
; GFX1250-NEXT: s_endpgm
@@ -168,11 +167,9 @@ define amdgpu_kernel void @test_s_barrier_agent_fence() {
;
; GFX1250-LABEL: test_s_barrier_agent_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_alu 0xffe3
; GFX1250-NEXT: s_barrier_signal -1
; GFX1250-NEXT: s_barrier_wait -1
; GFX1250-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
index 97d52d5f1f26d..6a76f4307dcad 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
@@ -82,6 +82,8 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
;
; GFX1250-LABEL: workgroup_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
@@ -153,6 +155,8 @@ define amdgpu_kernel void @workgroup_release_fence() {
;
; GFX1250-LABEL: workgroup_release_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
@@ -229,6 +233,8 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
;
; GFX1250-LABEL: workgroup_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
@@ -305,6 +311,8 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
;
; GFX1250-LABEL: workgroup_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
@@ -379,6 +387,8 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
;
; GFX1250-LABEL: workgroup_one_as_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
@@ -450,6 +460,8 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
;
; GFX1250-LABEL: workgroup_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
@@ -526,6 +538,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
;
; GFX1250-LABEL: workgroup_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
@@ -602,6 +616,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
;
; GFX1250-LABEL: workgroup_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
@@ -787,8 +803,7 @@ define amdgpu_kernel void @agent_release_fence() {
;
; GFX1250-LABEL: agent_release_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -893,8 +908,7 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
;
; GFX1250-LABEL: agent_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -1000,8 +1014,7 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
;
; GFX1250-LABEL: agent_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -1190,8 +1203,7 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
;
; GFX1250-LABEL: agent_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -1296,8 +1308,7 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
;
; GFX1250-LABEL: agent_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -1403,8 +1414,7 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
;
; GFX1250-LABEL: agent_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -1600,8 +1610,6 @@ define amdgpu_kernel void @system_release_fence() {
; GFX1250-LABEL: system_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -1713,8 +1721,6 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX1250-LABEL: system_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -1827,8 +1833,6 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX1250-LABEL: system_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -2024,8 +2028,6 @@ define amdgpu_kernel void @system_one_as_release_fence() {
; GFX1250-LABEL: system_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -2137,8 +2139,6 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX1250-LABEL: system_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -2251,8 +2251,6 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX1250-LABEL: system_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
index b3f6533d43887..736a8b58466dd 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
@@ -1066,7 +1066,8 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
;
; GFX1250-LABEL: workgroup_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire
@@ -1146,7 +1147,8 @@ define amdgpu_kernel void @workgroup_release_fence() {
;
; GFX1250-LABEL: workgroup_release_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release
@@ -1231,7 +1233,8 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
;
; GFX1250-LABEL: workgroup_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel
@@ -1316,7 +1319,8 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
;
; GFX1250-LABEL: workgroup_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst
@@ -1391,6 +1395,8 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
;
; GFX1250-LABEL: workgroup_one_as_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acquire
@@ -1462,6 +1468,8 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
;
; GFX1250-LABEL: workgroup_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") release
@@ -1538,6 +1546,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
;
; GFX1250-LABEL: workgroup_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acq_rel
@@ -1614,6 +1624,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
;
; GFX1250-LABEL: workgroup_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") seq_cst
@@ -1799,8 +1811,7 @@ define amdgpu_kernel void @agent_release_fence() {
;
; GFX1250-LABEL: agent_release_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -1905,8 +1916,7 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
;
; GFX1250-LABEL: agent_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -2012,8 +2022,7 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
;
; GFX1250-LABEL: agent_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -2202,8 +2211,7 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
;
; GFX1250-LABEL: agent_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -2308,8 +2316,7 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
;
; GFX1250-LABEL: agent_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -2415,8 +2422,7 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
;
; GFX1250-LABEL: agent_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -2612,8 +2618,6 @@ define amdgpu_kernel void @system_release_fence() {
; GFX1250-LABEL: system_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -2725,8 +2729,6 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX1250-LABEL: system_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -2839,8 +2841,6 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX1250-LABEL: system_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -3036,8 +3036,6 @@ define amdgpu_kernel void @system_one_as_release_fence() {
; GFX1250-LABEL: system_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -3149,8 +3147,6 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX1250-LABEL: system_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -3263,8 +3259,6 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX1250-LABEL: system_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
index 36adbc0011118..55ec0c2255f9b 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
@@ -830,14 +830,10 @@ define amdgpu_kernel void @flat_agent_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -1000,6 +996,7 @@ define amdgpu_kernel void @flat_agent_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -1159,6 +1156,7 @@ define amdgpu_kernel void @flat_agent_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -1342,9 +1340,9 @@ define amdgpu_kernel void @flat_agent_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@@ -1529,9 +1527,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@@ -1692,6 +1690,7 @@ define amdgpu_kernel void @flat_agent_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -1882,6 +1881,7 @@ define amdgpu_kernel void @flat_agent_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -2067,9 +2067,9 @@ define amdgpu_kernel void @flat_agent_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@@ -2285,9 +2285,9 @@ define amdgpu_kernel void @flat_agent_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -2505,9 +2505,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -2729,6 +2729,7 @@ define amdgpu_kernel void @flat_agent_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -2979,13 +2980,11 @@ define amdgpu_kernel void @flat_agent_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -3235,13 +3234,11 @@ define amdgpu_kernel void @flat_agent_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -3496,6 +3493,7 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -3779,6 +3777,7 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -4057,9 +4056,9 @@ define amdgpu_kernel void @flat_agent_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@@ -4368,9 +4367,9 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -4681,9 +4680,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -4970,6 +4969,7 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -5255,6 +5255,7 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -5564,9 +5565,9 @@ define amdgpu_kernel void @flat_agent_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -5877,9 +5878,9 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -6190,9 +6191,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -6503,9 +6504,9 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -6816,9 +6817,9 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -7129,9 +7130,9 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -7442,9 +7443,9 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -7755,9 +7756,9 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -8057,6 +8058,7 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -8372,6 +8374,7 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -8697,9 +8700,9 @@ define amdgpu_kernel void @flat_agent_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -9044,13 +9047,11 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -9394,13 +9395,11 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -9720,9 +9719,8 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -10038,6 +10036,7 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -10382,13 +10381,11 @@ define amdgpu_kernel void @flat_agent_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -10732,13 +10729,11 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -11082,13 +11077,11 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -11432,13 +11425,11 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -11778,9 +11769,9 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -12126,13 +12117,11 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -12476,13 +12465,11 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -12826,13 +12813,11 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -13684,14 +13669,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -13855,6 +13836,7 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -14014,6 +13996,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -14197,10 +14180,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -14384,10 +14367,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -14547,6 +14530,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -14733,6 +14717,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -14918,10 +14903,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -15132,10 +15117,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -15348,10 +15333,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -15582,6 +15567,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -15843,13 +15829,11 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -16110,13 +16094,11 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -16372,6 +16354,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -16651,6 +16634,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -16929,10 +16913,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -17236,10 +17220,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -17545,10 +17529,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -17830,6 +17814,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -18111,6 +18096,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -18416,10 +18402,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -18725,10 +18711,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -19034,10 +19020,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -19343,10 +19329,10 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -19652,10 +19638,10 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -19961,10 +19947,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -20270,10 +20256,10 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -20579,10 +20565,10 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -20881,6 +20867,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -21206,6 +21193,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -21532,10 +21520,10 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -21889,13 +21877,11 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -22250,13 +22236,11 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -22587,9 +22571,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -22916,6 +22899,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -23271,13 +23255,11 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -23632,13 +23614,11 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -23993,13 +23973,11 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -24354,13 +24332,11 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -24711,10 +24687,10 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -25070,13 +25046,11 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -25431,13 +25405,11 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -25792,13 +25764,11 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
index 5526b29037977..faa970e049bd2 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
@@ -114,8 +114,6 @@ define amdgpu_kernel void @flat_last_use_and_volatile_load(ptr %in, ptr %out) {
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
index 964f1c8957f6f..721ecd8da5387 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
@@ -1329,8 +1329,6 @@ define amdgpu_kernel void @flat_nontemporal_volatile_load(
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll
index 871c941dd6dca..635895259ee32 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll
@@ -936,6 +936,7 @@ define amdgpu_kernel void @flat_singlethread_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -1095,6 +1096,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -1254,6 +1256,7 @@ define amdgpu_kernel void @flat_singlethread_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -1413,6 +1416,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -1572,6 +1576,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -1731,6 +1736,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -1890,6 +1896,7 @@ define amdgpu_kernel void @flat_singlethread_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -2049,6 +2056,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -2208,6 +2216,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -2411,6 +2420,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -2617,6 +2627,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -2823,6 +2834,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -3077,6 +3089,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -3329,6 +3342,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -3581,6 +3595,7 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -3833,6 +3848,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -4085,6 +4101,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -4337,6 +4354,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -4589,6 +4607,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -4841,6 +4860,7 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -5093,6 +5113,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -5345,6 +5366,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -5597,6 +5619,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -5849,6 +5872,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -6101,6 +6125,7 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -6353,6 +6378,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -6605,6 +6631,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -6901,6 +6928,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -7201,6 +7229,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -7501,6 +7530,7 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -7801,6 +7831,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -8101,6 +8132,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -8401,6 +8433,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -8701,6 +8734,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -9001,6 +9035,7 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -9301,6 +9336,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -9601,6 +9637,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -9901,6 +9938,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -10201,6 +10239,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -10501,6 +10540,7 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -10801,6 +10841,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -11101,6 +11142,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -12037,6 +12079,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -12196,6 +12239,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -12355,6 +12399,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -12514,6 +12559,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -12673,6 +12719,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -12832,6 +12879,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -12991,6 +13039,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -13150,6 +13199,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -13309,6 +13359,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -13512,6 +13563,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -13718,6 +13770,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -13924,6 +13977,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -14178,6 +14232,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -14430,6 +14485,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -14682,6 +14738,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -14934,6 +14991,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -15186,6 +15244,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -15438,6 +15497,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -15690,6 +15750,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -15942,6 +16003,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -16194,6 +16256,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -16446,6 +16509,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -16698,6 +16762,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -16950,6 +17015,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -17202,6 +17268,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -17454,6 +17521,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -17706,6 +17774,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -18002,6 +18071,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_ret_cmpx
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -18302,6 +18372,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -18602,6 +18673,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -18902,6 +18974,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -19202,6 +19275,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -19502,6 +19576,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -19802,6 +19877,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -20102,6 +20178,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -20402,6 +20479,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -20702,6 +20780,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -21002,6 +21081,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -21302,6 +21382,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -21602,6 +21683,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -21902,6 +21984,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -22202,6 +22285,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
index 9d70a2437e553..e45a8e51c836c 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
@@ -834,14 +834,10 @@ define amdgpu_kernel void @flat_system_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -1004,6 +1000,7 @@ define amdgpu_kernel void @flat_system_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -1163,6 +1160,7 @@ define amdgpu_kernel void @flat_system_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -1351,9 +1349,8 @@ define amdgpu_kernel void @flat_system_release_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@@ -1543,9 +1540,8 @@ define amdgpu_kernel void @flat_system_seq_cst_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@@ -1706,6 +1702,7 @@ define amdgpu_kernel void @flat_system_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -1898,6 +1895,7 @@ define amdgpu_kernel void @flat_system_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -2088,9 +2086,8 @@ define amdgpu_kernel void @flat_system_release_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@@ -2313,9 +2310,8 @@ define amdgpu_kernel void @flat_system_acq_rel_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -2540,9 +2536,8 @@ define amdgpu_kernel void @flat_system_seq_cst_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -2766,6 +2761,7 @@ define amdgpu_kernel void @flat_system_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -3023,13 +3019,10 @@ define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -3286,13 +3279,10 @@ define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -3547,6 +3537,7 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -3832,6 +3823,7 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -4115,9 +4107,8 @@ define amdgpu_kernel void @flat_system_release_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@@ -4433,9 +4424,8 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -4753,9 +4743,8 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -5044,6 +5033,7 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -5331,6 +5321,7 @@ define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -5647,9 +5638,8 @@ define amdgpu_kernel void @flat_system_release_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -5967,9 +5957,8 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -6287,9 +6276,8 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -6607,9 +6595,8 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -6927,9 +6914,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -7247,9 +7233,8 @@ define amdgpu_kernel void @flat_system_release_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -7567,9 +7552,8 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -7887,9 +7871,8 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -8189,6 +8172,7 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -8506,6 +8490,7 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -8836,9 +8821,8 @@ define amdgpu_kernel void @flat_system_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -9190,13 +9174,10 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -9547,13 +9528,10 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -9875,9 +9853,8 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -10195,6 +10172,7 @@ define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -10546,13 +10524,10 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -10903,13 +10878,10 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -11260,13 +11232,10 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -11617,13 +11586,10 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -11970,9 +11936,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -12325,13 +12290,10 @@ define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -12682,13 +12644,10 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -13039,13 +12998,10 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -13901,14 +13857,10 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -14072,6 +14024,7 @@ define amdgpu_kernel void @flat_system_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -14231,6 +14184,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -14419,10 +14373,9 @@ define amdgpu_kernel void @flat_system_one_as_release_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -14611,10 +14564,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -14774,6 +14726,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -14962,6 +14915,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -15152,10 +15106,9 @@ define amdgpu_kernel void @flat_system_one_as_release_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -15373,10 +15326,9 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -15596,10 +15548,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -15832,6 +15783,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -16100,13 +16052,10 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -16374,13 +16323,10 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -16636,6 +16582,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -16917,6 +16864,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -17200,10 +17148,9 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -17514,10 +17461,9 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -17830,10 +17776,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -18117,6 +18062,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -18400,6 +18346,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -18712,10 +18659,9 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -19028,10 +18974,9 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -19344,10 +19289,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -19660,10 +19604,9 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -19976,10 +19919,9 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -20292,10 +20234,9 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -20608,10 +20549,9 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -20924,10 +20864,9 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -21226,6 +21165,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -21553,6 +21493,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -21884,10 +21825,9 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -22248,13 +22188,10 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -22616,13 +22553,10 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -22955,9 +22889,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -23286,6 +23219,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -23648,13 +23582,10 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -24016,13 +23947,10 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -24384,13 +24312,10 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -24752,13 +24677,10 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -25116,10 +25038,9 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -25482,13 +25403,10 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -25850,13 +25768,10 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -26218,13 +26133,10 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
index 77f52e4d4b9fd..41c5927cad4de 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
@@ -152,8 +152,6 @@ define amdgpu_kernel void @flat_nontemporal_load_0(
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
@@ -433,8 +431,6 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v1, s[2:3] scale_offset scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
@@ -1151,7 +1147,9 @@ define amdgpu_kernel void @flat_volatile_workgroup_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll
index f086542b3d1f8..041b3f51abc2f 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll
@@ -936,6 +936,7 @@ define amdgpu_kernel void @flat_wavefront_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -1095,6 +1096,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -1254,6 +1256,7 @@ define amdgpu_kernel void @flat_wavefront_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -1413,6 +1416,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -1572,6 +1576,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -1731,6 +1736,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -1890,6 +1896,7 @@ define amdgpu_kernel void @flat_wavefront_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -2049,6 +2056,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -2208,6 +2216,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -2411,6 +2420,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -2617,6 +2627,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -2823,6 +2834,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -3077,6 +3089,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -3329,6 +3342,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -3581,6 +3595,7 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -3833,6 +3848,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -4085,6 +4101,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -4337,6 +4354,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -4589,6 +4607,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -4841,6 +4860,7 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -5093,6 +5113,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -5345,6 +5366,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -5597,6 +5619,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -5849,6 +5872,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -6101,6 +6125,7 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -6353,6 +6378,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -6605,6 +6631,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -6901,6 +6928,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -7201,6 +7229,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -7501,6 +7530,7 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -7801,6 +7831,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -8101,6 +8132,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -8401,6 +8433,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -8701,6 +8734,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -9001,6 +9035,7 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -9301,6 +9336,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -9601,6 +9637,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -9901,6 +9938,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -10201,6 +10239,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -10501,6 +10540,7 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -10801,6 +10841,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -11101,6 +11142,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -12037,6 +12079,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -12196,6 +12239,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -12355,6 +12399,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -12514,6 +12559,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -12673,6 +12719,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -12832,6 +12879,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -12991,6 +13039,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -13150,6 +13199,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -13309,6 +13359,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -13512,6 +13563,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -13718,6 +13770,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -13924,6 +13977,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -14178,6 +14232,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -14430,6 +14485,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -14682,6 +14738,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -14934,6 +14991,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -15186,6 +15244,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -15438,6 +15497,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -15690,6 +15750,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -15942,6 +16003,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -16194,6 +16256,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -16446,6 +16509,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -16698,6 +16762,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -16950,6 +17015,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -17202,6 +17268,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -17454,6 +17521,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -17706,6 +17774,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -18002,6 +18071,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -18302,6 +18372,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -18602,6 +18673,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -18902,6 +18974,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -19202,6 +19275,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -19502,6 +19576,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -19802,6 +19877,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -20102,6 +20178,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -20402,6 +20479,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -20702,6 +20780,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -21002,6 +21081,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -21302,6 +21382,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -21602,6 +21683,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -21902,6 +21984,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll
index d8e6ad043e061..85ecab8128d2f 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll
@@ -816,7 +816,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -980,6 +981,7 @@ define amdgpu_kernel void @flat_workgroup_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -1139,6 +1141,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -1315,7 +1318,9 @@ define amdgpu_kernel void @flat_workgroup_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -1492,7 +1497,9 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -1652,6 +1659,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -1830,8 +1838,9 @@ define amdgpu_kernel void @flat_workgroup_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@@ -2007,7 +2016,9 @@ define amdgpu_kernel void @flat_workgroup_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -2203,9 +2214,11 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@@ -2400,9 +2413,11 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@@ -2617,6 +2632,7 @@ define amdgpu_kernel void @flat_workgroup_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -2854,7 +2870,9 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -3092,7 +3110,9 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -3347,6 +3367,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -3618,8 +3639,9 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -3888,7 +3910,9 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -4177,9 +4201,11 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -4467,9 +4493,11 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -4740,8 +4768,9 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -5012,8 +5041,9 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -5301,9 +5331,11 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -5591,9 +5623,11 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -5881,9 +5915,11 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -6171,9 +6207,11 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -6469,6 +6507,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -6781,6 +6820,7 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -7098,7 +7138,9 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -7430,7 +7472,9 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -7762,7 +7806,9 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -8077,6 +8123,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -8389,6 +8436,7 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -8720,7 +8768,9 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -9052,7 +9102,9 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -9384,7 +9436,9 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -9716,7 +9770,9 @@ define amdgpu_kernel void @flat_workgroup_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -10046,7 +10102,9 @@ define amdgpu_kernel void @flat_workgroup_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -10378,7 +10436,9 @@ define amdgpu_kernel void @flat_workgroup_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -10710,7 +10770,9 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -11042,7 +11104,9 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -11844,6 +11908,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -12007,6 +12073,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -12166,6 +12233,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -12335,6 +12403,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -12504,6 +12575,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
@@ -12663,6 +12737,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -12832,7 +12907,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@@ -13001,6 +13078,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
@@ -13180,7 +13260,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@@ -13359,7 +13443,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in) {
entry:
@@ -13570,6 +13658,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -13796,6 +13885,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -14022,6 +14114,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -14276,6 +14371,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -14538,7 +14634,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -14800,6 +14898,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
@@ -15072,7 +15173,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -15344,7 +15449,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -15606,7 +15715,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -15868,7 +15979,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -16140,7 +16253,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -16412,7 +16529,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -16684,7 +16805,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -16956,7 +17081,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -17228,7 +17357,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -17500,7 +17633,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -17772,7 +17909,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -18044,7 +18185,11 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %out, i32 %in, i32 %old) {
entry:
@@ -18340,6 +18485,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -18648,6 +18794,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -18958,6 +19105,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -19278,6 +19428,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -19598,6 +19751,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -19908,6 +20064,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -20216,6 +20373,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -20536,6 +20694,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -20856,6 +21017,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -21176,6 +21340,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -21496,6 +21663,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -21814,6 +21984,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -22134,6 +22307,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -22454,6 +22630,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
@@ -22774,6 +22953,9 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
index 184e15406bfbc..5c2d8eb4f5ec0 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
@@ -834,14 +834,10 @@ define amdgpu_kernel void @global_agent_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -1011,6 +1007,7 @@ define amdgpu_kernel void @global_agent_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -1177,6 +1174,7 @@ define amdgpu_kernel void @global_agent_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -1368,9 +1366,9 @@ define amdgpu_kernel void @global_agent_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@@ -1563,9 +1561,9 @@ define amdgpu_kernel void @global_agent_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@@ -1731,6 +1729,7 @@ define amdgpu_kernel void @global_agent_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -1924,6 +1923,7 @@ define amdgpu_kernel void @global_agent_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -2115,9 +2115,9 @@ define amdgpu_kernel void @global_agent_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@@ -2337,9 +2337,9 @@ define amdgpu_kernel void @global_agent_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -2561,9 +2561,9 @@ define amdgpu_kernel void @global_agent_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -2775,6 +2775,7 @@ define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -3016,13 +3017,11 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -3263,13 +3262,11 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -3505,6 +3502,7 @@ define amdgpu_kernel void @global_agent_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -3767,6 +3765,7 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -4027,9 +4026,9 @@ define amdgpu_kernel void @global_agent_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@@ -4318,9 +4317,9 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -4611,9 +4610,9 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -4879,6 +4878,7 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -5143,6 +5143,7 @@ define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -5432,9 +5433,9 @@ define amdgpu_kernel void @global_agent_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -5725,9 +5726,9 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -6018,9 +6019,9 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -6311,9 +6312,9 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -6604,9 +6605,9 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -6897,9 +6898,9 @@ define amdgpu_kernel void @global_agent_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -7190,9 +7191,9 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -7483,9 +7484,9 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -7752,6 +7753,7 @@ define amdgpu_kernel void @global_agent_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -8035,6 +8037,7 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -8328,9 +8331,9 @@ define amdgpu_kernel void @global_agent_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
@@ -8644,13 +8647,11 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -8963,13 +8964,11 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -9257,9 +9256,8 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -9543,6 +9541,7 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -9856,13 +9855,11 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -10175,13 +10172,11 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -10494,13 +10489,11 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -10813,13 +10806,11 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -11128,9 +11119,9 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
@@ -11445,13 +11436,11 @@ define amdgpu_kernel void @global_agent_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -11764,13 +11753,11 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -12083,13 +12070,11 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -12923,14 +12908,10 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -13100,6 +13081,7 @@ define amdgpu_kernel void @global_agent_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -13266,6 +13248,7 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -13457,10 +13440,10 @@ define amdgpu_kernel void @global_agent_one_as_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -13652,10 +13635,10 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -13820,6 +13803,7 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -14013,6 +13997,7 @@ define amdgpu_kernel void @global_agent_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -14204,10 +14189,10 @@ define amdgpu_kernel void @global_agent_one_as_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -14426,10 +14411,10 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -14650,10 +14635,10 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -14864,6 +14849,7 @@ define amdgpu_kernel void @global_agent_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -15105,13 +15091,11 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -15352,13 +15336,11 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -15594,6 +15576,7 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -15856,6 +15839,7 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -16116,10 +16100,10 @@ define amdgpu_kernel void @global_agent_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -16407,10 +16391,10 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -16700,10 +16684,10 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -16968,6 +16952,7 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -17232,6 +17217,7 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -17521,10 +17507,10 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -17814,10 +17800,10 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -18107,10 +18093,10 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -18400,10 +18386,10 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -18693,10 +18679,10 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -18986,10 +18972,10 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -19279,10 +19265,10 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -19572,10 +19558,10 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -19841,6 +19827,7 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -20124,6 +20111,7 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -20437,13 +20425,11 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -20756,13 +20742,11 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -21050,9 +21034,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -21336,6 +21319,7 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -21649,13 +21633,11 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -21968,13 +21950,11 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -22287,13 +22267,11 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -22606,13 +22584,11 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -22921,10 +22897,10 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -23238,13 +23214,11 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -23557,13 +23531,11 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -23876,13 +23848,11 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
index ed2d62356f8f2..ca7802d295e0b 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
@@ -94,8 +94,6 @@ define amdgpu_kernel void @global_last_use_and_volatile_load(ptr addrspace(1) %i
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
index c1bfe21865c15..d74c230488ea2 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
@@ -1112,8 +1112,6 @@ define amdgpu_kernel void @global_nontemporal_volatile_load(
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
index 6a5a6e01c741b..e7f7b1d196be7 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
@@ -952,6 +952,7 @@ define amdgpu_kernel void @global_singlethread_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -1118,6 +1119,7 @@ define amdgpu_kernel void @global_singlethread_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -1284,6 +1286,7 @@ define amdgpu_kernel void @global_singlethread_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -1450,6 +1453,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -1614,6 +1618,7 @@ define amdgpu_kernel void @global_singlethread_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -1778,6 +1783,7 @@ define amdgpu_kernel void @global_singlethread_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -1942,6 +1948,7 @@ define amdgpu_kernel void @global_singlethread_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -2106,6 +2113,7 @@ define amdgpu_kernel void @global_singlethread_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -2270,6 +2278,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -2462,6 +2471,7 @@ define amdgpu_kernel void @global_singlethread_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -2657,6 +2667,7 @@ define amdgpu_kernel void @global_singlethread_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -2852,6 +2863,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -3087,6 +3099,7 @@ define amdgpu_kernel void @global_singlethread_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -3320,6 +3333,7 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -3553,6 +3567,7 @@ define amdgpu_kernel void @global_singlethread_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -3786,6 +3801,7 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -4019,6 +4035,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -4252,6 +4269,7 @@ define amdgpu_kernel void @global_singlethread_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -4485,6 +4503,7 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -4718,6 +4737,7 @@ define amdgpu_kernel void @global_singlethread_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -4951,6 +4971,7 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -5184,6 +5205,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -5417,6 +5439,7 @@ define amdgpu_kernel void @global_singlethread_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -5650,6 +5673,7 @@ define amdgpu_kernel void @global_singlethread_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -5883,6 +5907,7 @@ define amdgpu_kernel void @global_singlethread_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -6116,6 +6141,7 @@ define amdgpu_kernel void @global_singlethread_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -6349,6 +6375,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -6612,6 +6639,7 @@ define amdgpu_kernel void @global_singlethread_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -6879,6 +6907,7 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -7146,6 +7175,7 @@ define amdgpu_kernel void @global_singlethread_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -7413,6 +7443,7 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -7680,6 +7711,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -7947,6 +7979,7 @@ define amdgpu_kernel void @global_singlethread_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -8214,6 +8247,7 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -8481,6 +8515,7 @@ define amdgpu_kernel void @global_singlethread_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -8748,6 +8783,7 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -9015,6 +9051,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -9282,6 +9319,7 @@ define amdgpu_kernel void @global_singlethread_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -9549,6 +9587,7 @@ define amdgpu_kernel void @global_singlethread_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -9816,6 +9855,7 @@ define amdgpu_kernel void @global_singlethread_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -10083,6 +10123,7 @@ define amdgpu_kernel void @global_singlethread_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -10350,6 +10391,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -11301,6 +11343,7 @@ define amdgpu_kernel void @global_singlethread_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -11467,6 +11510,7 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -11633,6 +11677,7 @@ define amdgpu_kernel void @global_singlethread_one_as_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -11799,6 +11844,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -11963,6 +12009,7 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -12127,6 +12174,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -12291,6 +12339,7 @@ define amdgpu_kernel void @global_singlethread_one_as_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -12455,6 +12504,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -12619,6 +12669,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -12811,6 +12862,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -13006,6 +13058,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -13201,6 +13254,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -13436,6 +13490,7 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -13669,6 +13724,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -13902,6 +13958,7 @@ define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -14135,6 +14192,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -14368,6 +14426,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -14601,6 +14660,7 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -14834,6 +14894,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -15067,6 +15128,7 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -15300,6 +15362,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -15533,6 +15596,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -15766,6 +15830,7 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -15999,6 +16064,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -16232,6 +16298,7 @@ define amdgpu_kernel void @global_singlethread_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -16465,6 +16532,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -16698,6 +16766,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -16961,6 +17030,7 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_ret_cm
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -17228,6 +17298,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_ret_cmpx
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -17495,6 +17566,7 @@ define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_ret_cmpx
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -17762,6 +17834,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_ret_cmpx
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -18029,6 +18102,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_ret_cmpx
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -18296,6 +18370,7 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_acquire_ret_cmpx
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -18563,6 +18638,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -18830,6 +18906,7 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -19097,6 +19174,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -19364,6 +19442,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -19631,6 +19710,7 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_seq_cst_ret_cmpx
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -19898,6 +19978,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_seq_cst_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -20165,6 +20246,7 @@ define amdgpu_kernel void @global_singlethread_one_as_release_seq_cst_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -20432,6 +20514,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -20699,6 +20782,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
index 7ddd515830e11..e7880a81800fd 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
@@ -838,14 +838,10 @@ define amdgpu_kernel void @global_system_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -1015,6 +1011,7 @@ define amdgpu_kernel void @global_system_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -1181,6 +1178,7 @@ define amdgpu_kernel void @global_system_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -1377,9 +1375,8 @@ define amdgpu_kernel void @global_system_release_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@@ -1577,9 +1574,8 @@ define amdgpu_kernel void @global_system_seq_cst_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@@ -1745,6 +1741,7 @@ define amdgpu_kernel void @global_system_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -1940,6 +1937,7 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -2136,9 +2134,8 @@ define amdgpu_kernel void @global_system_release_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@@ -2365,9 +2362,8 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -2596,9 +2592,8 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -2812,6 +2807,7 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -3060,13 +3056,10 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -3314,13 +3307,10 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -3556,6 +3546,7 @@ define amdgpu_kernel void @global_system_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -3820,6 +3811,7 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -4085,9 +4077,8 @@ define amdgpu_kernel void @global_system_release_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@@ -4383,9 +4374,8 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -4683,9 +4673,8 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -4953,6 +4942,7 @@ define amdgpu_kernel void @global_system_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -5219,6 +5209,7 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -5515,9 +5506,8 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -5815,9 +5805,8 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -6115,9 +6104,8 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -6415,9 +6403,8 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
@@ -6684,6 +6671,7 @@ define amdgpu_kernel void @global_system_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -6969,6 +6957,7 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -7289,13 +7278,10 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -7615,13 +7601,10 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -7911,9 +7894,8 @@ define amdgpu_kernel void @global_system_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -8199,6 +8181,7 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -8519,13 +8502,10 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -8845,13 +8825,10 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -9171,13 +9148,10 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -9497,13 +9471,10 @@ define amdgpu_kernel void @global_system_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -9819,9 +9790,8 @@ define amdgpu_kernel void @global_system_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
@@ -10143,13 +10113,10 @@ define amdgpu_kernel void @global_system_relese_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -10469,13 +10436,10 @@ define amdgpu_kernel void @global_system_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -10795,13 +10759,10 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -11639,14 +11600,10 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -11816,6 +11773,7 @@ define amdgpu_kernel void @global_system_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -11982,6 +11940,7 @@ define amdgpu_kernel void @global_system_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -12178,10 +12137,9 @@ define amdgpu_kernel void @global_system_one_as_release_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -12378,10 +12336,9 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -12546,6 +12503,7 @@ define amdgpu_kernel void @global_system_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -12741,6 +12699,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -12937,10 +12896,9 @@ define amdgpu_kernel void @global_system_one_as_release_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -13166,10 +13124,9 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -13397,10 +13354,9 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -13613,6 +13569,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -13861,13 +13818,10 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -14115,13 +14069,10 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -14357,6 +14308,7 @@ define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -14621,6 +14573,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -14886,10 +14839,9 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -15184,10 +15136,9 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -15484,10 +15435,9 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -15754,6 +15704,7 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -16020,6 +15971,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -16316,10 +16268,9 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -16616,10 +16567,9 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -16916,10 +16866,9 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -17216,10 +17165,9 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -17516,10 +17464,9 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -17816,10 +17763,9 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -18116,10 +18062,9 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -18416,10 +18361,9 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -18685,6 +18629,7 @@ define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -18970,6 +18915,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -19268,10 +19214,9 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -19591,13 +19536,10 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -19917,13 +19859,10 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -20213,9 +20152,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -20501,6 +20439,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -20821,13 +20760,10 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -21147,13 +21083,10 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -21473,13 +21406,10 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -21799,13 +21729,10 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -22121,10 +22048,9 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -22445,13 +22371,10 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -22771,13 +22694,10 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -23097,13 +23017,10 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
index 1539fb574c0bd..3bf5ed8b2397f 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
@@ -155,8 +155,6 @@ define amdgpu_kernel void @global_volatile_load_0(
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
@@ -368,8 +366,6 @@ define amdgpu_kernel void @global_volatile_load_1(
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
@@ -1041,7 +1037,9 @@ define amdgpu_kernel void @global_volatile_workgroup_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
index 1aa8305b1a837..09eb062d876f6 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
@@ -952,6 +952,7 @@ define amdgpu_kernel void @global_wavefront_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -1118,6 +1119,7 @@ define amdgpu_kernel void @global_wavefront_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -1284,6 +1286,7 @@ define amdgpu_kernel void @global_wavefront_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -1450,6 +1453,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -1614,6 +1618,7 @@ define amdgpu_kernel void @global_wavefront_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -1778,6 +1783,7 @@ define amdgpu_kernel void @global_wavefront_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -1942,6 +1948,7 @@ define amdgpu_kernel void @global_wavefront_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -2106,6 +2113,7 @@ define amdgpu_kernel void @global_wavefront_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -2270,6 +2278,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -2462,6 +2471,7 @@ define amdgpu_kernel void @global_wavefront_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -2657,6 +2667,7 @@ define amdgpu_kernel void @global_wavefront_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -2852,6 +2863,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -3087,6 +3099,7 @@ define amdgpu_kernel void @global_wavefront_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -3320,6 +3333,7 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -3553,6 +3567,7 @@ define amdgpu_kernel void @global_wavefront_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -3786,6 +3801,7 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -4019,6 +4035,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -4252,6 +4269,7 @@ define amdgpu_kernel void @global_wavefront_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -4485,6 +4503,7 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -4718,6 +4737,7 @@ define amdgpu_kernel void @global_wavefront_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -4951,6 +4971,7 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -5184,6 +5205,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -5417,6 +5439,7 @@ define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -5650,6 +5673,7 @@ define amdgpu_kernel void @global_wavefront_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -5883,6 +5907,7 @@ define amdgpu_kernel void @global_wavefront_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -6116,6 +6141,7 @@ define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -6349,6 +6375,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -6612,6 +6639,7 @@ define amdgpu_kernel void @global_wavefront_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -6879,6 +6907,7 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -7146,6 +7175,7 @@ define amdgpu_kernel void @global_wavefront_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -7413,6 +7443,7 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -7680,6 +7711,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -7947,6 +7979,7 @@ define amdgpu_kernel void @global_wavefront_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -8214,6 +8247,7 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -8481,6 +8515,7 @@ define amdgpu_kernel void @global_wavefront_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -8748,6 +8783,7 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -9015,6 +9051,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -9282,6 +9319,7 @@ define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -9549,6 +9587,7 @@ define amdgpu_kernel void @global_wavefront_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -9816,6 +9855,7 @@ define amdgpu_kernel void @global_wavefront_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -10083,6 +10123,7 @@ define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -10350,6 +10391,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -11301,6 +11343,7 @@ define amdgpu_kernel void @global_wavefront_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -11467,6 +11510,7 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -11633,6 +11677,7 @@ define amdgpu_kernel void @global_wavefront_one_as_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -11799,6 +11844,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -11963,6 +12009,7 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -12127,6 +12174,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -12291,6 +12339,7 @@ define amdgpu_kernel void @global_wavefront_one_as_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -12455,6 +12504,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -12619,6 +12669,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -12811,6 +12862,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -13006,6 +13058,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -13201,6 +13254,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -13436,6 +13490,7 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -13669,6 +13724,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -13902,6 +13958,7 @@ define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -14135,6 +14192,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -14368,6 +14426,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -14601,6 +14660,7 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -14834,6 +14894,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -15067,6 +15128,7 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -15300,6 +15362,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -15533,6 +15596,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -15766,6 +15830,7 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -15999,6 +16064,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -16232,6 +16298,7 @@ define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -16465,6 +16532,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -16698,6 +16766,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -16961,6 +17030,7 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_ret_cmpxc
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -17228,6 +17298,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -17495,6 +17566,7 @@ define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -17762,6 +17834,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -18029,6 +18102,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -18296,6 +18370,7 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -18563,6 +18638,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -18830,6 +18906,7 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -19097,6 +19174,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -19364,6 +19442,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -19631,6 +19710,7 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -19898,6 +19978,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -20165,6 +20246,7 @@ define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -20432,6 +20514,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -20699,6 +20782,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
index 3eab16e6b9713..885edec03c2b6 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
@@ -804,7 +804,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt 0x0
@@ -975,6 +976,7 @@ define amdgpu_kernel void @global_workgroup_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -1141,6 +1143,7 @@ define amdgpu_kernel void @global_workgroup_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -1325,7 +1328,9 @@ define amdgpu_kernel void @global_workgroup_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -1510,7 +1515,9 @@ define amdgpu_kernel void @global_workgroup_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -1675,6 +1682,7 @@ define amdgpu_kernel void @global_workgroup_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -1849,7 +1857,9 @@ define amdgpu_kernel void @global_workgroup_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
@@ -2031,7 +2041,9 @@ define amdgpu_kernel void @global_workgroup_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -2224,8 +2236,11 @@ define amdgpu_kernel void @global_workgroup_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
@@ -2417,8 +2432,11 @@ define amdgpu_kernel void @global_workgroup_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
@@ -2615,6 +2633,7 @@ define amdgpu_kernel void @global_workgroup_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -2835,7 +2854,9 @@ define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -3056,7 +3077,9 @@ define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -3292,6 +3315,7 @@ define amdgpu_kernel void @global_workgroup_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -3535,7 +3559,9 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -3786,7 +3812,9 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -4048,8 +4076,11 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -4310,8 +4341,11 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -4554,7 +4588,9 @@ define amdgpu_kernel void @global_workgroup_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -4797,7 +4833,9 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -5058,8 +5096,11 @@ define amdgpu_kernel void @global_workgroup_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -5320,8 +5361,11 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -5582,8 +5626,11 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -5844,8 +5891,11 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -6106,8 +6156,11 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -6368,8 +6421,11 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -6630,8 +6686,11 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -6892,8 +6951,11 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -7156,6 +7218,7 @@ define amdgpu_kernel void @global_workgroup_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -7428,6 +7491,7 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -7713,7 +7777,9 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -8006,7 +8072,9 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -8299,7 +8367,9 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -8574,6 +8644,7 @@ define amdgpu_kernel void @global_workgroup_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -8846,6 +8917,7 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -9138,7 +9210,9 @@ define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -9431,7 +9505,9 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -9724,7 +9800,9 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -10017,7 +10095,9 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -10308,7 +10388,9 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -10601,7 +10683,9 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -10894,7 +10978,9 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -11187,7 +11273,9 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -11987,6 +12075,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_load(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt 0x0
@@ -12157,6 +12247,7 @@ define amdgpu_kernel void @global_workgroup_one_as_unordered_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -12323,6 +12414,7 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -12499,6 +12591,9 @@ define amdgpu_kernel void @global_workgroup_one_as_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -12675,6 +12770,9 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
@@ -12839,6 +12937,7 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -13013,7 +13112,9 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
@@ -13187,6 +13288,9 @@ define amdgpu_kernel void @global_workgroup_one_as_release_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
@@ -13371,7 +13475,11 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
@@ -13555,7 +13663,11 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in) {
entry:
@@ -13752,6 +13864,7 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -13964,6 +14077,9 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -14176,6 +14292,9 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -14411,6 +14530,7 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -14654,7 +14774,9 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -14897,6 +15019,9 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
@@ -15150,7 +15275,11 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -15403,7 +15532,11 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -15646,7 +15779,9 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -15889,7 +16024,9 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -16142,7 +16279,11 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -16395,7 +16536,11 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -16648,7 +16793,11 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -16901,7 +17050,11 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -17154,7 +17307,11 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -17407,7 +17564,11 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -17660,7 +17821,11 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -17913,7 +18078,11 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:16
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
@@ -18176,6 +18345,7 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_ret_cmpxc
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -18448,6 +18618,7 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -18725,6 +18896,9 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -19009,6 +19183,9 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -19293,6 +19470,9 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -19567,6 +19747,7 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_acquire_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -19839,6 +20020,7 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -20123,6 +20305,9 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -20407,6 +20592,9 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -20691,6 +20879,9 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -20975,6 +21166,9 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -21257,6 +21451,9 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -21541,6 +21738,9 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -21825,6 +22025,9 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -22109,6 +22312,9 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:16 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
index 102616b9a2065..986b48b60a443 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
@@ -762,7 +762,8 @@ define amdgpu_kernel void @local_agent_seq_cst_load(
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_load_b32 v1, v0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -1235,7 +1236,8 @@ define amdgpu_kernel void @local_agent_release_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
@@ -1404,7 +1406,8 @@ define amdgpu_kernel void @local_agent_seq_cst_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
@@ -1890,7 +1893,8 @@ define amdgpu_kernel void @local_agent_release_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
@@ -2075,7 +2079,8 @@ define amdgpu_kernel void @local_agent_acq_rel_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -2261,7 +2266,8 @@ define amdgpu_kernel void @local_agent_seq_cst_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -2679,7 +2685,8 @@ define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -2899,7 +2906,8 @@ define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -3475,7 +3483,8 @@ define amdgpu_kernel void @local_agent_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
@@ -3689,7 +3698,8 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -3904,7 +3914,8 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -4511,7 +4522,8 @@ define amdgpu_kernel void @local_agent_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -4726,7 +4738,8 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -4941,7 +4954,8 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -5156,7 +5170,8 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -5371,7 +5386,8 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -5586,7 +5602,8 @@ define amdgpu_kernel void @local_agent_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -5801,7 +5818,8 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -6016,7 +6034,8 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -6714,7 +6733,8 @@ define amdgpu_kernel void @local_agent_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -6964,7 +6984,8 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -7214,7 +7235,8 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -7926,7 +7948,8 @@ define amdgpu_kernel void @local_agent_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -8176,7 +8199,8 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -8426,7 +8450,8 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -8676,7 +8701,8 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -8926,7 +8952,8 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -9176,7 +9203,8 @@ define amdgpu_kernel void @local_agent_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -9426,7 +9454,8 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -9676,7 +9705,8 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
index 1356fe4854170..81bbe0a78203e 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
@@ -762,7 +762,8 @@ define amdgpu_kernel void @local_system_seq_cst_load(
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_load_b32 v1, v0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -1235,7 +1236,8 @@ define amdgpu_kernel void @local_system_release_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
@@ -1404,7 +1406,8 @@ define amdgpu_kernel void @local_system_seq_cst_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
@@ -1890,7 +1893,8 @@ define amdgpu_kernel void @local_system_release_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
@@ -2075,7 +2079,8 @@ define amdgpu_kernel void @local_system_acq_rel_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -2261,7 +2266,8 @@ define amdgpu_kernel void @local_system_seq_cst_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -2679,7 +2685,8 @@ define amdgpu_kernel void @local_system_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -2899,7 +2906,8 @@ define amdgpu_kernel void @local_system_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -3475,7 +3483,8 @@ define amdgpu_kernel void @local_system_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
@@ -3689,7 +3698,8 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -3904,7 +3914,8 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -4511,7 +4522,8 @@ define amdgpu_kernel void @local_system_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -4726,7 +4738,8 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -4941,7 +4954,8 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -5156,7 +5170,8 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -5371,7 +5386,8 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -5586,7 +5602,8 @@ define amdgpu_kernel void @local_system_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -5801,7 +5818,8 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -6016,7 +6034,8 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -6714,7 +6733,8 @@ define amdgpu_kernel void @local_system_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -6964,7 +6984,8 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -7214,7 +7235,8 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -7926,7 +7948,8 @@ define amdgpu_kernel void @local_system_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -8176,7 +8199,8 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -8426,7 +8450,8 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -8676,7 +8701,8 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -8926,7 +8952,8 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -9176,7 +9203,8 @@ define amdgpu_kernel void @local_system_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -9426,7 +9454,8 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -9676,7 +9705,8 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
index 75e28f9008e28..980141a87ecf3 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
@@ -890,7 +890,8 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll
index 6aaf9d323b1fd..6a233a2c9013b 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll
@@ -762,7 +762,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_load(
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_load_b32 v1, v0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -1235,7 +1236,8 @@ define amdgpu_kernel void @local_workgroup_release_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
@@ -1404,7 +1406,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_store(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
@@ -1890,7 +1893,8 @@ define amdgpu_kernel void @local_workgroup_release_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in) {
@@ -2075,7 +2079,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -2261,7 +2266,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v0, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -2679,7 +2685,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -2899,7 +2906,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_storexchg_rtn_b32 v1, v0, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -3475,7 +3483,8 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %out, i32 %in, i32 %old) {
@@ -3689,7 +3698,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -3904,7 +3914,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -4511,7 +4522,8 @@ define amdgpu_kernel void @local_workgroup_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -4726,7 +4738,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -4941,7 +4954,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -5156,7 +5170,8 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -5371,7 +5386,8 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -5586,7 +5602,8 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -5801,7 +5818,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -6016,7 +6034,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
; GFX1250-NEXT: v_mov_b32_e32 v1, s1
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_b32 v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
@@ -6714,7 +6733,8 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -6964,7 +6984,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -7214,7 +7235,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -7926,7 +7948,8 @@ define amdgpu_kernel void @local_workgroup_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -8176,7 +8199,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -8426,7 +8450,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -8676,7 +8701,8 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -8926,7 +8952,8 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -9176,7 +9203,8 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -9426,7 +9454,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
@@ -9676,7 +9705,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_cmpstore_rtn_b32 v1, v0, v1, v2 offset:16
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-agent.ll
index 4ca0cc92e09be..8ac3414da7354 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-agent.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-agent.ll
@@ -804,13 +804,9 @@ define amdgpu_kernel void @private_agent_seq_cst_load(
; GFX1250-NEXT: v_cndmask_b32_e64 v0, s1, v0, s2
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -992,6 +988,7 @@ define amdgpu_kernel void @private_agent_unordered_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -1170,6 +1167,7 @@ define amdgpu_kernel void @private_agent_monotonic_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -1348,9 +1346,9 @@ define amdgpu_kernel void @private_agent_release_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@@ -1530,9 +1528,9 @@ define amdgpu_kernel void @private_agent_seq_cst_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@@ -1712,6 +1710,7 @@ define amdgpu_kernel void @private_agent_monotonic_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -1890,6 +1889,7 @@ define amdgpu_kernel void @private_agent_acquire_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -2070,9 +2070,9 @@ define amdgpu_kernel void @private_agent_release_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@@ -2252,9 +2252,9 @@ define amdgpu_kernel void @private_agent_acq_rel_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -2436,9 +2436,9 @@ define amdgpu_kernel void @private_agent_seq_cst_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -2674,6 +2674,7 @@ define amdgpu_kernel void @private_agent_acquire_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -2910,13 +2911,11 @@ define amdgpu_kernel void @private_agent_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -3152,13 +3151,11 @@ define amdgpu_kernel void @private_agent_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -3430,6 +3427,7 @@ define amdgpu_kernel void @private_agent_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -3699,6 +3697,7 @@ define amdgpu_kernel void @private_agent_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -3970,9 +3969,9 @@ define amdgpu_kernel void @private_agent_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
@@ -4243,9 +4242,9 @@ define amdgpu_kernel void @private_agent_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -4518,9 +4517,9 @@ define amdgpu_kernel void @private_agent_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -4793,6 +4792,7 @@ define amdgpu_kernel void @private_agent_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -5064,6 +5064,7 @@ define amdgpu_kernel void @private_agent_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -5335,9 +5336,9 @@ define amdgpu_kernel void @private_agent_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -5610,9 +5611,9 @@ define amdgpu_kernel void @private_agent_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -5885,9 +5886,9 @@ define amdgpu_kernel void @private_agent_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -6160,9 +6161,9 @@ define amdgpu_kernel void @private_agent_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -6435,9 +6436,9 @@ define amdgpu_kernel void @private_agent_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -6710,9 +6711,9 @@ define amdgpu_kernel void @private_agent_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -6985,9 +6986,9 @@ define amdgpu_kernel void @private_agent_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -7260,9 +7261,9 @@ define amdgpu_kernel void @private_agent_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -7563,6 +7564,7 @@ define amdgpu_kernel void @private_agent_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -7864,6 +7866,7 @@ define amdgpu_kernel void @private_agent_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -8166,9 +8169,9 @@ define amdgpu_kernel void @private_agent_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -8471,13 +8474,11 @@ define amdgpu_kernel void @private_agent_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -8779,13 +8780,11 @@ define amdgpu_kernel void @private_agent_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -9087,9 +9086,8 @@ define amdgpu_kernel void @private_agent_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -9391,6 +9389,7 @@ define amdgpu_kernel void @private_agent_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -9693,13 +9692,11 @@ define amdgpu_kernel void @private_agent_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -10001,13 +9998,11 @@ define amdgpu_kernel void @private_agent_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -10309,13 +10304,11 @@ define amdgpu_kernel void @private_agent_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -10617,13 +10610,11 @@ define amdgpu_kernel void @private_agent_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -10925,9 +10916,9 @@ define amdgpu_kernel void @private_agent_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -11231,13 +11222,11 @@ define amdgpu_kernel void @private_agent_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -11539,13 +11528,11 @@ define amdgpu_kernel void @private_agent_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -11847,13 +11834,11 @@ define amdgpu_kernel void @private_agent_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -12658,13 +12643,9 @@ define amdgpu_kernel void @private_agent_one_as_seq_cst_load(
; GFX1250-NEXT: v_cndmask_b32_e64 v0, s1, v0, s2
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -12847,6 +12828,7 @@ define amdgpu_kernel void @private_agent_one_as_unordered_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -13025,6 +13007,7 @@ define amdgpu_kernel void @private_agent_one_as_monotonic_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -13203,10 +13186,10 @@ define amdgpu_kernel void @private_agent_one_as_release_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -13385,10 +13368,10 @@ define amdgpu_kernel void @private_agent_one_as_seq_cst_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -13567,6 +13550,7 @@ define amdgpu_kernel void @private_agent_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -13745,6 +13729,7 @@ define amdgpu_kernel void @private_agent_one_as_acquire_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -13925,10 +13910,10 @@ define amdgpu_kernel void @private_agent_one_as_release_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -14107,10 +14092,10 @@ define amdgpu_kernel void @private_agent_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -14291,10 +14276,10 @@ define amdgpu_kernel void @private_agent_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -14529,6 +14514,7 @@ define amdgpu_kernel void @private_agent_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -14766,13 +14752,11 @@ define amdgpu_kernel void @private_agent_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -15009,13 +14993,11 @@ define amdgpu_kernel void @private_agent_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -15288,6 +15270,7 @@ define amdgpu_kernel void @private_agent_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -15557,6 +15540,7 @@ define amdgpu_kernel void @private_agent_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -15828,10 +15812,10 @@ define amdgpu_kernel void @private_agent_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -16101,10 +16085,10 @@ define amdgpu_kernel void @private_agent_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -16376,10 +16360,10 @@ define amdgpu_kernel void @private_agent_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -16651,6 +16635,7 @@ define amdgpu_kernel void @private_agent_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -16922,6 +16907,7 @@ define amdgpu_kernel void @private_agent_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -17193,10 +17179,10 @@ define amdgpu_kernel void @private_agent_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -17468,10 +17454,10 @@ define amdgpu_kernel void @private_agent_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -17743,10 +17729,10 @@ define amdgpu_kernel void @private_agent_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -18018,10 +18004,10 @@ define amdgpu_kernel void @private_agent_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -18293,10 +18279,10 @@ define amdgpu_kernel void @private_agent_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -18568,10 +18554,10 @@ define amdgpu_kernel void @private_agent_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -18843,10 +18829,10 @@ define amdgpu_kernel void @private_agent_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -19118,10 +19104,10 @@ define amdgpu_kernel void @private_agent_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -19421,6 +19407,7 @@ define amdgpu_kernel void @private_agent_one_as_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -19722,6 +19709,7 @@ define amdgpu_kernel void @private_agent_one_as_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -20025,13 +20013,11 @@ define amdgpu_kernel void @private_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -20334,13 +20320,11 @@ define amdgpu_kernel void @private_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -20643,9 +20627,8 @@ define amdgpu_kernel void @private_agent_one_as_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -20948,6 +20931,7 @@ define amdgpu_kernel void @private_agent_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -21251,13 +21235,11 @@ define amdgpu_kernel void @private_agent_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -21560,13 +21542,11 @@ define amdgpu_kernel void @private_agent_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -21869,13 +21849,11 @@ define amdgpu_kernel void @private_agent_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -22178,13 +22156,11 @@ define amdgpu_kernel void @private_agent_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -22487,10 +22463,10 @@ define amdgpu_kernel void @private_agent_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
@@ -22794,13 +22770,11 @@ define amdgpu_kernel void @private_agent_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -23103,13 +23077,11 @@ define amdgpu_kernel void @private_agent_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -23412,13 +23384,11 @@ define amdgpu_kernel void @private_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
+; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_dscnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-singlethread.ll
index e9ee6b4925a13..f5ba70e454823 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-singlethread.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-singlethread.ll
@@ -984,6 +984,7 @@ define amdgpu_kernel void @private_singlethread_unordered_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -1162,6 +1163,7 @@ define amdgpu_kernel void @private_singlethread_monotonic_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -1340,6 +1342,7 @@ define amdgpu_kernel void @private_singlethread_release_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -1518,6 +1521,7 @@ define amdgpu_kernel void @private_singlethread_seq_cst_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -1696,6 +1700,7 @@ define amdgpu_kernel void @private_singlethread_monotonic_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -1874,6 +1879,7 @@ define amdgpu_kernel void @private_singlethread_acquire_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -2052,6 +2058,7 @@ define amdgpu_kernel void @private_singlethread_release_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -2230,6 +2237,7 @@ define amdgpu_kernel void @private_singlethread_acq_rel_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -2408,6 +2416,7 @@ define amdgpu_kernel void @private_singlethread_seq_cst_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -2640,6 +2649,7 @@ define amdgpu_kernel void @private_singlethread_acquire_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -2875,6 +2885,7 @@ define amdgpu_kernel void @private_singlethread_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -3110,6 +3121,7 @@ define amdgpu_kernel void @private_singlethread_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -3381,6 +3393,7 @@ define amdgpu_kernel void @private_singlethread_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -3650,6 +3663,7 @@ define amdgpu_kernel void @private_singlethread_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -3919,6 +3933,7 @@ define amdgpu_kernel void @private_singlethread_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -4188,6 +4203,7 @@ define amdgpu_kernel void @private_singlethread_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -4457,6 +4473,7 @@ define amdgpu_kernel void @private_singlethread_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -4726,6 +4743,7 @@ define amdgpu_kernel void @private_singlethread_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -4995,6 +5013,7 @@ define amdgpu_kernel void @private_singlethread_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -5264,6 +5283,7 @@ define amdgpu_kernel void @private_singlethread_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -5533,6 +5553,7 @@ define amdgpu_kernel void @private_singlethread_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -5802,6 +5823,7 @@ define amdgpu_kernel void @private_singlethread_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -6071,6 +6093,7 @@ define amdgpu_kernel void @private_singlethread_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -6340,6 +6363,7 @@ define amdgpu_kernel void @private_singlethread_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -6609,6 +6633,7 @@ define amdgpu_kernel void @private_singlethread_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -6878,6 +6903,7 @@ define amdgpu_kernel void @private_singlethread_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -7147,6 +7173,7 @@ define amdgpu_kernel void @private_singlethread_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -7444,6 +7471,7 @@ define amdgpu_kernel void @private_singlethread_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -7745,6 +7773,7 @@ define amdgpu_kernel void @private_singlethread_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -8046,6 +8075,7 @@ define amdgpu_kernel void @private_singlethread_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -8347,6 +8377,7 @@ define amdgpu_kernel void @private_singlethread_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -8648,6 +8679,7 @@ define amdgpu_kernel void @private_singlethread_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -8949,6 +8981,7 @@ define amdgpu_kernel void @private_singlethread_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -9250,6 +9283,7 @@ define amdgpu_kernel void @private_singlethread_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -9551,6 +9585,7 @@ define amdgpu_kernel void @private_singlethread_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -9852,6 +9887,7 @@ define amdgpu_kernel void @private_singlethread_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -10153,6 +10189,7 @@ define amdgpu_kernel void @private_singlethread_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -10454,6 +10491,7 @@ define amdgpu_kernel void @private_singlethread_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -10755,6 +10793,7 @@ define amdgpu_kernel void @private_singlethread_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -11056,6 +11095,7 @@ define amdgpu_kernel void @private_singlethread_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -11357,6 +11397,7 @@ define amdgpu_kernel void @private_singlethread_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -11658,6 +11699,7 @@ define amdgpu_kernel void @private_singlethread_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -12641,6 +12683,7 @@ define amdgpu_kernel void @private_singlethread_one_as_unordered_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -12819,6 +12862,7 @@ define amdgpu_kernel void @private_singlethread_one_as_monotonic_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -12997,6 +13041,7 @@ define amdgpu_kernel void @private_singlethread_one_as_release_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -13175,6 +13220,7 @@ define amdgpu_kernel void @private_singlethread_one_as_seq_cst_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -13353,6 +13399,7 @@ define amdgpu_kernel void @private_singlethread_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -13531,6 +13578,7 @@ define amdgpu_kernel void @private_singlethread_one_as_acquire_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -13709,6 +13757,7 @@ define amdgpu_kernel void @private_singlethread_one_as_release_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -13887,6 +13936,7 @@ define amdgpu_kernel void @private_singlethread_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -14065,6 +14115,7 @@ define amdgpu_kernel void @private_singlethread_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -14297,6 +14348,7 @@ define amdgpu_kernel void @private_singlethread_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -14532,6 +14584,7 @@ define amdgpu_kernel void @private_singlethread_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -14767,6 +14820,7 @@ define amdgpu_kernel void @private_singlethread_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -15038,6 +15092,7 @@ define amdgpu_kernel void @private_singlethread_one_as_monotonic_monotonic_cmpxc
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -15307,6 +15362,7 @@ define amdgpu_kernel void @private_singlethread_one_as_acquire_monotonic_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -15576,6 +15632,7 @@ define amdgpu_kernel void @private_singlethread_one_as_release_monotonic_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -15845,6 +15902,7 @@ define amdgpu_kernel void @private_singlethread_one_as_acq_rel_monotonic_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -16114,6 +16172,7 @@ define amdgpu_kernel void @private_singlethread_one_as_seq_cst_monotonic_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -16383,6 +16442,7 @@ define amdgpu_kernel void @private_singlethread_one_as_monotonic_acquire_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -16652,6 +16712,7 @@ define amdgpu_kernel void @private_singlethread_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -16921,6 +16982,7 @@ define amdgpu_kernel void @private_singlethread_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -17190,6 +17252,7 @@ define amdgpu_kernel void @private_singlethread_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -17459,6 +17522,7 @@ define amdgpu_kernel void @private_singlethread_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -17728,6 +17792,7 @@ define amdgpu_kernel void @private_singlethread_one_as_monotonic_seq_cst_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -17997,6 +18062,7 @@ define amdgpu_kernel void @private_singlethread_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -18266,6 +18332,7 @@ define amdgpu_kernel void @private_singlethread_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -18535,6 +18602,7 @@ define amdgpu_kernel void @private_singlethread_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -18804,6 +18872,7 @@ define amdgpu_kernel void @private_singlethread_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -19101,6 +19170,7 @@ define amdgpu_kernel void @private_singlethread_one_as_monotonic_monotonic_ret_c
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -19402,6 +19472,7 @@ define amdgpu_kernel void @private_singlethread_one_as_acquire_monotonic_ret_cmp
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -19703,6 +19774,7 @@ define amdgpu_kernel void @private_singlethread_one_as_release_monotonic_ret_cmp
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -20004,6 +20076,7 @@ define amdgpu_kernel void @private_singlethread_one_as_acq_rel_monotonic_ret_cmp
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -20305,6 +20378,7 @@ define amdgpu_kernel void @private_singlethread_one_as_seq_cst_monotonic_ret_cmp
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -20606,6 +20680,7 @@ define amdgpu_kernel void @private_singlethread_one_as_monotonic_acquire_ret_cmp
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -20907,6 +20982,7 @@ define amdgpu_kernel void @private_singlethread_one_as_acquire_acquire_ret_cmpxc
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -21208,6 +21284,7 @@ define amdgpu_kernel void @private_singlethread_one_as_release_acquire_ret_cmpxc
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -21509,6 +21586,7 @@ define amdgpu_kernel void @private_singlethread_one_as_acq_rel_acquire_ret_cmpxc
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -21810,6 +21888,7 @@ define amdgpu_kernel void @private_singlethread_one_as_seq_cst_acquire_ret_cmpxc
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -22111,6 +22190,7 @@ define amdgpu_kernel void @private_singlethread_one_as_monotonic_seq_cst_ret_cmp
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -22412,6 +22492,7 @@ define amdgpu_kernel void @private_singlethread_one_as_acquire_seq_cst_ret_cmpxc
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -22713,6 +22794,7 @@ define amdgpu_kernel void @private_singlethread_one_as_release_seq_cst_ret_cmpxc
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -23014,6 +23096,7 @@ define amdgpu_kernel void @private_singlethread_one_as_acq_rel_seq_cst_ret_cmpxc
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -23315,6 +23398,7 @@ define amdgpu_kernel void @private_singlethread_one_as_seq_cst_seq_cst_ret_cmpxc
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-system.ll
index 24ec3a34c4e6e..1e2153f76bc03 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-system.ll
@@ -804,13 +804,9 @@ define amdgpu_kernel void @private_system_seq_cst_load(
; GFX1250-NEXT: v_cndmask_b32_e64 v0, s1, v0, s2
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -992,6 +988,7 @@ define amdgpu_kernel void @private_system_unordered_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -1170,6 +1167,7 @@ define amdgpu_kernel void @private_system_monotonic_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -1349,9 +1347,8 @@ define amdgpu_kernel void @private_system_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@@ -1532,9 +1529,8 @@ define amdgpu_kernel void @private_system_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@@ -1714,6 +1710,7 @@ define amdgpu_kernel void @private_system_monotonic_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -1892,6 +1889,7 @@ define amdgpu_kernel void @private_system_acquire_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -2073,9 +2071,8 @@ define amdgpu_kernel void @private_system_release_atomicrmw(
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@@ -2256,9 +2253,8 @@ define amdgpu_kernel void @private_system_acq_rel_atomicrmw(
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -2441,9 +2437,8 @@ define amdgpu_kernel void @private_system_seq_cst_atomicrmw(
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -2679,6 +2674,7 @@ define amdgpu_kernel void @private_system_acquire_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -2916,13 +2912,10 @@ define amdgpu_kernel void @private_system_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -3159,13 +3152,10 @@ define amdgpu_kernel void @private_system_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -3437,6 +3427,7 @@ define amdgpu_kernel void @private_system_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -3706,6 +3697,7 @@ define amdgpu_kernel void @private_system_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -3978,9 +3970,8 @@ define amdgpu_kernel void @private_system_release_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
@@ -4252,9 +4243,8 @@ define amdgpu_kernel void @private_system_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -4528,9 +4518,8 @@ define amdgpu_kernel void @private_system_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -4803,6 +4792,7 @@ define amdgpu_kernel void @private_system_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -5074,6 +5064,7 @@ define amdgpu_kernel void @private_system_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -5346,9 +5337,8 @@ define amdgpu_kernel void @private_system_release_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -5622,9 +5612,8 @@ define amdgpu_kernel void @private_system_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -5898,9 +5887,8 @@ define amdgpu_kernel void @private_system_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -6174,9 +6162,8 @@ define amdgpu_kernel void @private_system_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
@@ -6477,6 +6464,7 @@ define amdgpu_kernel void @private_system_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -6778,6 +6766,7 @@ define amdgpu_kernel void @private_system_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -7081,13 +7070,10 @@ define amdgpu_kernel void @private_system_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -7390,13 +7376,10 @@ define amdgpu_kernel void @private_system_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -7698,9 +7681,8 @@ define amdgpu_kernel void @private_system_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -8002,6 +7984,7 @@ define amdgpu_kernel void @private_system_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -8305,13 +8288,10 @@ define amdgpu_kernel void @private_system_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -8614,13 +8594,10 @@ define amdgpu_kernel void @private_system_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -8923,13 +8900,10 @@ define amdgpu_kernel void @private_system_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -9232,13 +9206,10 @@ define amdgpu_kernel void @private_system_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -9541,9 +9512,8 @@ define amdgpu_kernel void @private_system_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -9848,13 +9818,10 @@ define amdgpu_kernel void @private_system_relese_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -10157,13 +10124,10 @@ define amdgpu_kernel void @private_system_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -10466,13 +10430,10 @@ define amdgpu_kernel void @private_system_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -11277,13 +11238,9 @@ define amdgpu_kernel void @private_system_one_as_seq_cst_load(
; GFX1250-NEXT: v_cndmask_b32_e64 v0, s1, v0, s2
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -11466,6 +11423,7 @@ define amdgpu_kernel void @private_system_one_as_unordered_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -11644,6 +11602,7 @@ define amdgpu_kernel void @private_system_one_as_monotonic_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -11823,10 +11782,9 @@ define amdgpu_kernel void @private_system_one_as_release_store(
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -12006,10 +11964,9 @@ define amdgpu_kernel void @private_system_one_as_seq_cst_store(
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -12188,6 +12145,7 @@ define amdgpu_kernel void @private_system_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -12366,6 +12324,7 @@ define amdgpu_kernel void @private_system_one_as_acquire_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -12547,10 +12506,9 @@ define amdgpu_kernel void @private_system_one_as_release_atomicrmw(
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -12730,10 +12688,9 @@ define amdgpu_kernel void @private_system_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -12915,10 +12872,9 @@ define amdgpu_kernel void @private_system_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -13153,6 +13109,7 @@ define amdgpu_kernel void @private_system_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -13391,13 +13348,10 @@ define amdgpu_kernel void @private_system_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -13635,13 +13589,10 @@ define amdgpu_kernel void @private_system_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -13914,6 +13865,7 @@ define amdgpu_kernel void @private_system_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -14183,6 +14135,7 @@ define amdgpu_kernel void @private_system_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -14455,10 +14408,9 @@ define amdgpu_kernel void @private_system_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -14729,10 +14681,9 @@ define amdgpu_kernel void @private_system_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -15005,10 +14956,9 @@ define amdgpu_kernel void @private_system_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -15280,6 +15230,7 @@ define amdgpu_kernel void @private_system_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -15551,6 +15502,7 @@ define amdgpu_kernel void @private_system_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -15823,10 +15775,9 @@ define amdgpu_kernel void @private_system_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -16099,10 +16050,9 @@ define amdgpu_kernel void @private_system_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -16375,10 +16325,9 @@ define amdgpu_kernel void @private_system_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -16651,10 +16600,9 @@ define amdgpu_kernel void @private_system_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -16927,10 +16875,9 @@ define amdgpu_kernel void @private_system_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -17203,10 +17150,9 @@ define amdgpu_kernel void @private_system_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -17479,10 +17425,9 @@ define amdgpu_kernel void @private_system_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -17755,10 +17700,9 @@ define amdgpu_kernel void @private_system_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -18058,6 +18002,7 @@ define amdgpu_kernel void @private_system_one_as_monotonic_monotonic_ret_cmpxchg
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -18359,6 +18304,7 @@ define amdgpu_kernel void @private_system_one_as_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -18663,10 +18609,9 @@ define amdgpu_kernel void @private_system_one_as_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -18969,13 +18914,10 @@ define amdgpu_kernel void @private_system_one_as_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -19279,13 +19221,10 @@ define amdgpu_kernel void @private_system_one_as_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -19588,9 +19527,8 @@ define amdgpu_kernel void @private_system_one_as_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -19893,6 +19831,7 @@ define amdgpu_kernel void @private_system_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -20197,13 +20136,10 @@ define amdgpu_kernel void @private_system_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -20507,13 +20443,10 @@ define amdgpu_kernel void @private_system_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -20817,13 +20750,10 @@ define amdgpu_kernel void @private_system_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -21127,13 +21057,10 @@ define amdgpu_kernel void @private_system_one_as_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -21437,10 +21364,9 @@ define amdgpu_kernel void @private_system_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
@@ -21745,13 +21671,10 @@ define amdgpu_kernel void @private_system_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -22055,13 +21978,10 @@ define amdgpu_kernel void @private_system_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
@@ -22365,13 +22285,10 @@ define amdgpu_kernel void @private_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
-; GFX1250-NEXT: s_wait_bvhcnt 0x0
-; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_dscnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-wavefront.ll
index 8b2254412c0c8..28d9d5dacd9e3 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-wavefront.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-wavefront.ll
@@ -984,6 +984,7 @@ define amdgpu_kernel void @private_wavefront_unordered_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -1162,6 +1163,7 @@ define amdgpu_kernel void @private_wavefront_monotonic_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -1340,6 +1342,7 @@ define amdgpu_kernel void @private_wavefront_release_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -1518,6 +1521,7 @@ define amdgpu_kernel void @private_wavefront_seq_cst_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -1696,6 +1700,7 @@ define amdgpu_kernel void @private_wavefront_monotonic_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -1874,6 +1879,7 @@ define amdgpu_kernel void @private_wavefront_acquire_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -2052,6 +2058,7 @@ define amdgpu_kernel void @private_wavefront_release_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -2230,6 +2237,7 @@ define amdgpu_kernel void @private_wavefront_acq_rel_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -2408,6 +2416,7 @@ define amdgpu_kernel void @private_wavefront_seq_cst_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -2640,6 +2649,7 @@ define amdgpu_kernel void @private_wavefront_acquire_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -2875,6 +2885,7 @@ define amdgpu_kernel void @private_wavefront_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -3110,6 +3121,7 @@ define amdgpu_kernel void @private_wavefront_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -3381,6 +3393,7 @@ define amdgpu_kernel void @private_wavefront_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -3650,6 +3663,7 @@ define amdgpu_kernel void @private_wavefront_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -3919,6 +3933,7 @@ define amdgpu_kernel void @private_wavefront_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -4188,6 +4203,7 @@ define amdgpu_kernel void @private_wavefront_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -4457,6 +4473,7 @@ define amdgpu_kernel void @private_wavefront_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -4726,6 +4743,7 @@ define amdgpu_kernel void @private_wavefront_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -4995,6 +5013,7 @@ define amdgpu_kernel void @private_wavefront_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -5264,6 +5283,7 @@ define amdgpu_kernel void @private_wavefront_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -5533,6 +5553,7 @@ define amdgpu_kernel void @private_wavefront_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -5802,6 +5823,7 @@ define amdgpu_kernel void @private_wavefront_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -6071,6 +6093,7 @@ define amdgpu_kernel void @private_wavefront_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -6340,6 +6363,7 @@ define amdgpu_kernel void @private_wavefront_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -6609,6 +6633,7 @@ define amdgpu_kernel void @private_wavefront_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -6878,6 +6903,7 @@ define amdgpu_kernel void @private_wavefront_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -7147,6 +7173,7 @@ define amdgpu_kernel void @private_wavefront_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -7444,6 +7471,7 @@ define amdgpu_kernel void @private_wavefront_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -7745,6 +7773,7 @@ define amdgpu_kernel void @private_wavefront_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -8046,6 +8075,7 @@ define amdgpu_kernel void @private_wavefront_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -8347,6 +8377,7 @@ define amdgpu_kernel void @private_wavefront_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -8648,6 +8679,7 @@ define amdgpu_kernel void @private_wavefront_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -8949,6 +8981,7 @@ define amdgpu_kernel void @private_wavefront_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -9250,6 +9283,7 @@ define amdgpu_kernel void @private_wavefront_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -9551,6 +9585,7 @@ define amdgpu_kernel void @private_wavefront_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -9852,6 +9887,7 @@ define amdgpu_kernel void @private_wavefront_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -10153,6 +10189,7 @@ define amdgpu_kernel void @private_wavefront_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -10454,6 +10491,7 @@ define amdgpu_kernel void @private_wavefront_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -10755,6 +10793,7 @@ define amdgpu_kernel void @private_wavefront_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -11056,6 +11095,7 @@ define amdgpu_kernel void @private_wavefront_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -11357,6 +11397,7 @@ define amdgpu_kernel void @private_wavefront_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -11658,6 +11699,7 @@ define amdgpu_kernel void @private_wavefront_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -12641,6 +12683,7 @@ define amdgpu_kernel void @private_wavefront_one_as_unordered_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -12819,6 +12862,7 @@ define amdgpu_kernel void @private_wavefront_one_as_monotonic_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -12997,6 +13041,7 @@ define amdgpu_kernel void @private_wavefront_one_as_release_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -13175,6 +13220,7 @@ define amdgpu_kernel void @private_wavefront_one_as_seq_cst_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -13353,6 +13399,7 @@ define amdgpu_kernel void @private_wavefront_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -13531,6 +13578,7 @@ define amdgpu_kernel void @private_wavefront_one_as_acquire_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -13709,6 +13757,7 @@ define amdgpu_kernel void @private_wavefront_one_as_release_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -13887,6 +13936,7 @@ define amdgpu_kernel void @private_wavefront_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -14065,6 +14115,7 @@ define amdgpu_kernel void @private_wavefront_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -14297,6 +14348,7 @@ define amdgpu_kernel void @private_wavefront_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -14532,6 +14584,7 @@ define amdgpu_kernel void @private_wavefront_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -14767,6 +14820,7 @@ define amdgpu_kernel void @private_wavefront_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -15038,6 +15092,7 @@ define amdgpu_kernel void @private_wavefront_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -15307,6 +15362,7 @@ define amdgpu_kernel void @private_wavefront_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -15576,6 +15632,7 @@ define amdgpu_kernel void @private_wavefront_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -15845,6 +15902,7 @@ define amdgpu_kernel void @private_wavefront_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -16114,6 +16172,7 @@ define amdgpu_kernel void @private_wavefront_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -16383,6 +16442,7 @@ define amdgpu_kernel void @private_wavefront_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -16652,6 +16712,7 @@ define amdgpu_kernel void @private_wavefront_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -16921,6 +16982,7 @@ define amdgpu_kernel void @private_wavefront_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -17190,6 +17252,7 @@ define amdgpu_kernel void @private_wavefront_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -17459,6 +17522,7 @@ define amdgpu_kernel void @private_wavefront_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -17728,6 +17792,7 @@ define amdgpu_kernel void @private_wavefront_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -17997,6 +18062,7 @@ define amdgpu_kernel void @private_wavefront_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -18266,6 +18332,7 @@ define amdgpu_kernel void @private_wavefront_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -18535,6 +18602,7 @@ define amdgpu_kernel void @private_wavefront_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -18804,6 +18872,7 @@ define amdgpu_kernel void @private_wavefront_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -19101,6 +19170,7 @@ define amdgpu_kernel void @private_wavefront_one_as_monotonic_monotonic_ret_cmpx
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -19402,6 +19472,7 @@ define amdgpu_kernel void @private_wavefront_one_as_acquire_monotonic_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -19703,6 +19774,7 @@ define amdgpu_kernel void @private_wavefront_one_as_release_monotonic_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -20004,6 +20076,7 @@ define amdgpu_kernel void @private_wavefront_one_as_acq_rel_monotonic_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -20305,6 +20378,7 @@ define amdgpu_kernel void @private_wavefront_one_as_seq_cst_monotonic_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -20606,6 +20680,7 @@ define amdgpu_kernel void @private_wavefront_one_as_monotonic_acquire_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -20907,6 +20982,7 @@ define amdgpu_kernel void @private_wavefront_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -21208,6 +21284,7 @@ define amdgpu_kernel void @private_wavefront_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -21509,6 +21586,7 @@ define amdgpu_kernel void @private_wavefront_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -21810,6 +21888,7 @@ define amdgpu_kernel void @private_wavefront_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -22111,6 +22190,7 @@ define amdgpu_kernel void @private_wavefront_one_as_monotonic_seq_cst_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -22412,6 +22492,7 @@ define amdgpu_kernel void @private_wavefront_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -22713,6 +22794,7 @@ define amdgpu_kernel void @private_wavefront_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -23014,6 +23096,7 @@ define amdgpu_kernel void @private_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -23315,6 +23398,7 @@ define amdgpu_kernel void @private_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-workgroup.ll
index 127434c365f95..01b2f6835cf7b 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-workgroup.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-workgroup.ll
@@ -803,7 +803,8 @@ define amdgpu_kernel void @private_workgroup_seq_cst_load(
; GFX1250-NEXT: v_cndmask_b32_e64 v0, s1, v0, s2
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_load_b32 v0, v[0:1]
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -985,6 +986,7 @@ define amdgpu_kernel void @private_workgroup_unordered_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -1163,6 +1165,7 @@ define amdgpu_kernel void @private_workgroup_monotonic_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -1341,7 +1344,9 @@ define amdgpu_kernel void @private_workgroup_release_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -1520,7 +1525,9 @@ define amdgpu_kernel void @private_workgroup_seq_cst_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -1699,6 +1706,7 @@ define amdgpu_kernel void @private_workgroup_monotonic_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -1877,8 +1885,9 @@ define amdgpu_kernel void @private_workgroup_acquire_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
entry:
@@ -2056,7 +2065,9 @@ define amdgpu_kernel void @private_workgroup_release_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -2235,9 +2246,11 @@ define amdgpu_kernel void @private_workgroup_acq_rel_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
entry:
@@ -2415,9 +2428,11 @@ define amdgpu_kernel void @private_workgroup_seq_cst_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
entry:
@@ -2649,6 +2664,7 @@ define amdgpu_kernel void @private_workgroup_acquire_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -2884,7 +2900,9 @@ define amdgpu_kernel void @private_workgroup_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -3120,7 +3138,9 @@ define amdgpu_kernel void @private_workgroup_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -3392,6 +3412,7 @@ define amdgpu_kernel void @private_workgroup_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -3661,8 +3682,9 @@ define amdgpu_kernel void @private_workgroup_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -3931,7 +3953,9 @@ define amdgpu_kernel void @private_workgroup_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -4201,9 +4225,11 @@ define amdgpu_kernel void @private_workgroup_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -4472,9 +4498,11 @@ define amdgpu_kernel void @private_workgroup_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -4743,8 +4771,9 @@ define amdgpu_kernel void @private_workgroup_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -5013,8 +5042,9 @@ define amdgpu_kernel void @private_workgroup_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -5283,9 +5313,11 @@ define amdgpu_kernel void @private_workgroup_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -5554,9 +5586,11 @@ define amdgpu_kernel void @private_workgroup_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -5825,9 +5859,11 @@ define amdgpu_kernel void @private_workgroup_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -6096,9 +6132,11 @@ define amdgpu_kernel void @private_workgroup_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -6367,9 +6405,11 @@ define amdgpu_kernel void @private_workgroup_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -6638,9 +6678,11 @@ define amdgpu_kernel void @private_workgroup_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -6909,9 +6951,11 @@ define amdgpu_kernel void @private_workgroup_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -7180,9 +7224,11 @@ define amdgpu_kernel void @private_workgroup_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -7479,6 +7525,7 @@ define amdgpu_kernel void @private_workgroup_monotonic_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -7780,6 +7827,7 @@ define amdgpu_kernel void @private_workgroup_acquire_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -8081,7 +8129,9 @@ define amdgpu_kernel void @private_workgroup_release_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -8383,7 +8433,9 @@ define amdgpu_kernel void @private_workgroup_acq_rel_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -8685,7 +8737,9 @@ define amdgpu_kernel void @private_workgroup_seq_cst_monotonic_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -8987,6 +9041,7 @@ define amdgpu_kernel void @private_workgroup_monotonic_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -9288,6 +9343,7 @@ define amdgpu_kernel void @private_workgroup_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -9589,7 +9645,9 @@ define amdgpu_kernel void @private_workgroup_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -9891,7 +9949,9 @@ define amdgpu_kernel void @private_workgroup_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -10193,7 +10253,9 @@ define amdgpu_kernel void @private_workgroup_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -10495,7 +10557,9 @@ define amdgpu_kernel void @private_workgroup_monotonic_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -10797,7 +10861,9 @@ define amdgpu_kernel void @private_workgroup_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -11099,7 +11165,9 @@ define amdgpu_kernel void @private_workgroup_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -11401,7 +11469,9 @@ define amdgpu_kernel void @private_workgroup_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -11703,7 +11773,9 @@ define amdgpu_kernel void @private_workgroup_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
-; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -12506,6 +12578,8 @@ define amdgpu_kernel void @private_workgroup_one_as_seq_cst_load(
; GFX1250-NEXT: v_cndmask_b32_e64 v0, s1, v0, s2
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: flat_load_b32 v0, v[0:1]
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -12687,6 +12761,7 @@ define amdgpu_kernel void @private_workgroup_one_as_unordered_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -12865,6 +12940,7 @@ define amdgpu_kernel void @private_workgroup_one_as_monotonic_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -13043,6 +13119,9 @@ define amdgpu_kernel void @private_workgroup_one_as_release_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -13221,6 +13300,9 @@ define amdgpu_kernel void @private_workgroup_one_as_seq_cst_store(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(5) %out) {
@@ -13399,6 +13481,7 @@ define amdgpu_kernel void @private_workgroup_one_as_monotonic_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -13577,7 +13660,9 @@ define amdgpu_kernel void @private_workgroup_one_as_acquire_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
entry:
@@ -13755,6 +13840,9 @@ define amdgpu_kernel void @private_workgroup_one_as_release_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
@@ -13933,7 +14021,11 @@ define amdgpu_kernel void @private_workgroup_one_as_acq_rel_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
entry:
@@ -14111,7 +14203,11 @@ define amdgpu_kernel void @private_workgroup_one_as_seq_cst_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v[0:1], v2
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in) {
entry:
@@ -14343,6 +14439,7 @@ define amdgpu_kernel void @private_workgroup_one_as_acquire_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -14578,6 +14675,9 @@ define amdgpu_kernel void @private_workgroup_one_as_acq_rel_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -14813,6 +14913,9 @@ define amdgpu_kernel void @private_workgroup_one_as_seq_cst_ret_atomicrmw(
; GFX1250-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v1, v2
; GFX1250-NEXT: v_mov_b32_e32 v2, s1
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_swap_b32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -15084,6 +15187,7 @@ define amdgpu_kernel void @private_workgroup_one_as_monotonic_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -15353,7 +15457,9 @@ define amdgpu_kernel void @private_workgroup_one_as_acquire_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -15622,6 +15728,9 @@ define amdgpu_kernel void @private_workgroup_one_as_release_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
@@ -15891,7 +16000,11 @@ define amdgpu_kernel void @private_workgroup_one_as_acq_rel_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -16160,7 +16273,11 @@ define amdgpu_kernel void @private_workgroup_one_as_seq_cst_monotonic_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -16429,7 +16546,9 @@ define amdgpu_kernel void @private_workgroup_one_as_monotonic_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -16698,7 +16817,9 @@ define amdgpu_kernel void @private_workgroup_one_as_acquire_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -16967,7 +17088,11 @@ define amdgpu_kernel void @private_workgroup_one_as_release_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -17236,7 +17361,11 @@ define amdgpu_kernel void @private_workgroup_one_as_acq_rel_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -17505,7 +17634,11 @@ define amdgpu_kernel void @private_workgroup_one_as_seq_cst_acquire_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -17774,7 +17907,11 @@ define amdgpu_kernel void @private_workgroup_one_as_monotonic_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -18043,7 +18180,11 @@ define amdgpu_kernel void @private_workgroup_one_as_acquire_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -18312,7 +18453,11 @@ define amdgpu_kernel void @private_workgroup_one_as_release_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -18581,7 +18726,11 @@ define amdgpu_kernel void @private_workgroup_one_as_acq_rel_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -18850,7 +18999,11 @@ define amdgpu_kernel void @private_workgroup_one_as_seq_cst_seq_cst_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s0
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v[0:1], v[2:3]
+; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(5) %out, i32 %in, i32 %old) {
entry:
@@ -19147,6 +19300,7 @@ define amdgpu_kernel void @private_workgroup_one_as_monotonic_monotonic_ret_cmpx
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -19448,6 +19602,7 @@ define amdgpu_kernel void @private_workgroup_one_as_acquire_monotonic_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -19749,6 +19904,9 @@ define amdgpu_kernel void @private_workgroup_one_as_release_monotonic_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -20050,6 +20208,9 @@ define amdgpu_kernel void @private_workgroup_one_as_acq_rel_monotonic_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -20351,6 +20512,9 @@ define amdgpu_kernel void @private_workgroup_one_as_seq_cst_monotonic_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -20652,6 +20816,7 @@ define amdgpu_kernel void @private_workgroup_one_as_monotonic_acquire_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -20953,6 +21118,7 @@ define amdgpu_kernel void @private_workgroup_one_as_acquire_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -21254,6 +21420,9 @@ define amdgpu_kernel void @private_workgroup_one_as_release_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -21555,6 +21724,9 @@ define amdgpu_kernel void @private_workgroup_one_as_acq_rel_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -21856,6 +22028,9 @@ define amdgpu_kernel void @private_workgroup_one_as_seq_cst_acquire_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -22157,6 +22332,9 @@ define amdgpu_kernel void @private_workgroup_one_as_monotonic_seq_cst_ret_cmpxch
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -22458,6 +22636,9 @@ define amdgpu_kernel void @private_workgroup_one_as_acquire_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -22759,6 +22940,9 @@ define amdgpu_kernel void @private_workgroup_one_as_release_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -23060,6 +23244,9 @@ define amdgpu_kernel void @private_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
@@ -23361,6 +23548,9 @@ define amdgpu_kernel void @private_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg(
; GFX1250-NEXT: v_mov_b32_e32 v4, s1
; GFX1250-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX1250-NEXT: v_mov_b32_e32 v3, v4
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_storecnt 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: flat_atomic_cmpswap_b32 v0, v[0:1], v[2:3] th:TH_ATOMIC_RETURN
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SE
More information about the llvm-commits
mailing list