[clang] [llvm] [AMDGPU] Add amdgpu-as MMRA for fences (PR #78572)
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Wed May 22 01:46:11 PDT 2024
https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/78572
>From a2f665c8d045c8813322dd7522ec28b048d9a1e6 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Wed, 17 Jan 2024 11:26:57 +0100
Subject: [PATCH 1/9] [RFC][AMDGPU] Add OpenCL-specific fence address space
masks
Using MMRAs, implement `builtin_amdgcn_fence_opencl` to allow
device libs to emit fences that only target one or more address spaces, instead of fencing all address spaces at once.
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 1 +
clang/lib/CodeGen/CGBuiltin.cpp | 28 +
clang/lib/CodeGen/CodeGenFunction.h | 2 +
clang/lib/Sema/SemaChecking.cpp | 7 +-
.../builtin-amdgcn-fence-opencl.cpp | 108 ++
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 44 +-
.../memory-legalizer-fence-mmra-global.ll | 1428 +++++++++++++++++
.../memory-legalizer-fence-mmra-local.ll | 1188 ++++++++++++++
.../memory-legalizer-fence-mmra-private.ll | 1188 ++++++++++++++
9 files changed, 3985 insertions(+), 9 deletions(-)
create mode 100644 clang/test/CodeGenCXX/builtin-amdgcn-fence-opencl.cpp
create mode 100644 llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-private.ll
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 3e21a2fe2ac6b..23b7061255e31 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -69,6 +69,7 @@ BUILTIN(__builtin_amdgcn_iglp_opt, "vIi", "n")
BUILTIN(__builtin_amdgcn_s_dcache_inv, "v", "n")
BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n")
BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n")
+BUILTIN(__builtin_amdgcn_masked_fence, "vUiUicC*", "n")
BUILTIN(__builtin_amdgcn_groupstaticsize, "Ui", "n")
BUILTIN(__builtin_amdgcn_wavefrontsize, "Ui", "nc")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f9ee93049b12d..74a134ca17c73 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -57,6 +57,7 @@
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/MatrixBuilder.h"
+#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ScopedPrinter.h"
@@ -18413,6 +18414,26 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
return nullptr;
}
+void CodeGenFunction::AddAMDGCNAddressSpaceMMRA(llvm::Instruction *Inst,
+ llvm::Value *ASMask) {
+ constexpr const char *Tag = "opencl-fence-mem";
+
+ uint64_t Mask = cast<llvm::ConstantInt>(ASMask)->getZExtValue();
+ if (Mask == 0)
+ return;
+
+ // 3 bits can be set: local, global, image in that order.
+ LLVMContext &Ctx = Inst->getContext();
+ SmallVector<MMRAMetadata::TagT, 3> MMRAs;
+ if (Mask & (1 << 0))
+ MMRAs.push_back({Tag, "local"});
+ if (Mask & (1 << 1))
+ MMRAs.push_back({Tag, "global"});
+ if (Mask & (1 << 2))
+ MMRAs.push_back({Tag, "image"});
+ Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
+}
+
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
@@ -19085,6 +19106,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
EmitScalarExpr(E->getArg(1)), AO, SSID);
return Builder.CreateFence(AO, SSID);
}
+ case AMDGPU::BI__builtin_amdgcn_masked_fence: {
+ ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(1)),
+ EmitScalarExpr(E->getArg(2)), AO, SSID);
+ FenceInst *Fence = Builder.CreateFence(AO, SSID);
+ AddAMDGCNAddressSpaceMMRA(Fence, EmitScalarExpr(E->getArg(0)));
+ return Fence;
+ }
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index e1e687af6a781..9a6200ef58882 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4631,6 +4631,8 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue);
+
+ void AddAMDGCNAddressSpaceMMRA(llvm::Instruction *Inst, llvm::Value *ASMask);
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope,
llvm::AtomicOrdering &AO,
llvm::SyncScope::ID &SSID);
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 54789dde50691..3a4211b3922c9 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5707,6 +5707,10 @@ bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
OrderIndex = 0;
ScopeIndex = 1;
break;
+ case AMDGPU::BI__builtin_amdgcn_masked_fence:
+ OrderIndex = 1;
+ ScopeIndex = 2;
+ break;
default:
return false;
}
@@ -5729,7 +5733,8 @@ bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
switch (static_cast<llvm::AtomicOrderingCABI>(Ord)) {
case llvm::AtomicOrderingCABI::relaxed:
case llvm::AtomicOrderingCABI::consume:
- if (BuiltinID == AMDGPU::BI__builtin_amdgcn_fence)
+ if (BuiltinID == AMDGPU::BI__builtin_amdgcn_fence ||
+ BuiltinID == AMDGPU::BI__builtin_amdgcn_masked_fence)
return Diag(ArgExpr->getBeginLoc(),
diag::warn_atomic_op_has_invalid_memory_order)
<< 0 << ArgExpr->getSourceRange();
diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-fence-opencl.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-fence-opencl.cpp
new file mode 100644
index 0000000000000..51bf4e7c2e60f
--- /dev/null
+++ b/clang/test/CodeGenCXX/builtin-amdgcn-fence-opencl.cpp
@@ -0,0 +1,108 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --version 3
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
+// RUN: -triple=amdgcn-amd-amdhsa -Qn -mcode-object-version=none | FileCheck %s
+
+#define LOCAL_MASK (1 << 0)
+#define GLOBAL_MASK (1 << 1)
+#define IMAGE_MASK (1 << 2)
+
+//.
+// CHECK: @.str = private unnamed_addr addrspace(4) constant [10 x i8] c"workgroup\00", align 1
+// CHECK: @.str.1 = private unnamed_addr addrspace(4) constant [6 x i8] c"agent\00", align 1
+// CHECK: @.str.2 = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
+//.
+// CHECK-LABEL: define dso_local void @_Z10test_localv(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META1:![0-9]+]]
+// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META1]]
+// CHECK-NEXT: fence seq_cst, !mmra [[META1]]
+// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META1]]
+// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META1]]
+// CHECK-NEXT: ret void
+//
+void test_local() {
+
+ __builtin_amdgcn_masked_fence(LOCAL_MASK, __ATOMIC_SEQ_CST, "workgroup");
+
+ __builtin_amdgcn_masked_fence(LOCAL_MASK,__ATOMIC_ACQUIRE, "agent");
+
+ __builtin_amdgcn_masked_fence(LOCAL_MASK,__ATOMIC_SEQ_CST, "");
+
+ __builtin_amdgcn_masked_fence(LOCAL_MASK, 4, "agent");
+
+ __builtin_amdgcn_masked_fence(LOCAL_MASK, 3, "workgroup");
+}
+
+// CHECK-LABEL: define dso_local void @_Z11test_globalv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META2:![0-9]+]]
+// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META2]]
+// CHECK-NEXT: fence seq_cst, !mmra [[META2]]
+// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META2]]
+// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META2]]
+// CHECK-NEXT: ret void
+//
+void test_global() {
+
+ __builtin_amdgcn_masked_fence(GLOBAL_MASK, __ATOMIC_SEQ_CST, "workgroup");
+
+ __builtin_amdgcn_masked_fence(GLOBAL_MASK,__ATOMIC_ACQUIRE, "agent");
+
+ __builtin_amdgcn_masked_fence(GLOBAL_MASK,__ATOMIC_SEQ_CST, "");
+
+ __builtin_amdgcn_masked_fence(GLOBAL_MASK, 4, "agent");
+
+ __builtin_amdgcn_masked_fence(GLOBAL_MASK, 3, "workgroup");
+}
+
+// CHECK-LABEL: define dso_local void @_Z10test_imagev(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]]
+// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META3]]
+// CHECK-NEXT: fence seq_cst, !mmra [[META2]]
+// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
+// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
+// CHECK-NEXT: ret void
+//
+void test_image() {
+
+ __builtin_amdgcn_masked_fence(IMAGE_MASK, __ATOMIC_SEQ_CST, "workgroup");
+
+ __builtin_amdgcn_masked_fence(IMAGE_MASK,__ATOMIC_ACQUIRE, "agent");
+
+ __builtin_amdgcn_masked_fence(GLOBAL_MASK,__ATOMIC_SEQ_CST, "");
+
+ __builtin_amdgcn_masked_fence(IMAGE_MASK, 4, "agent");
+
+ __builtin_amdgcn_masked_fence(IMAGE_MASK, 3, "workgroup");
+}
+
+// CHECK-LABEL: define dso_local void @_Z10test_mixedv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]]
+// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
+// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]]
+// CHECK-NEXT: ret void
+//
+void test_mixed() {
+
+ __builtin_amdgcn_masked_fence(IMAGE_MASK | GLOBAL_MASK, __ATOMIC_SEQ_CST, "workgroup");
+ __builtin_amdgcn_masked_fence(IMAGE_MASK | GLOBAL_MASK | LOCAL_MASK, __ATOMIC_SEQ_CST, "workgroup");
+
+ __builtin_amdgcn_masked_fence(0xFF,__ATOMIC_SEQ_CST, "workgroup");
+}
+//.
+// CHECK: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+//.
+// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// CHECK: [[META1]] = !{!"opencl-fence-mem", !"local"}
+// CHECK: [[META2]] = !{!"opencl-fence-mem", !"global"}
+// CHECK: [[META3]] = !{!"opencl-fence-mem", !"image"}
+// CHECK: [[META4]] = !{[[META2]], [[META3]]}
+// CHECK: [[META5]] = !{[[META1]], [[META2]], [[META3]]}
+//.
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 62306fa667b36..cba398b2be6be 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/TargetParser/TargetParser.h"
@@ -678,6 +679,28 @@ class SIMemoryLegalizer final : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;
};
+/// Reads \p MI's MMRAs to parse the "opencl-fence-mem" MMRA.
+/// If this tag isn't present, or if it has no meaningful value, returns \p
+/// Default. Otherwise returns all the address spaces concerned by the MMRA.
+static SIAtomicAddrSpace
+getOpenCLFenceAddrSpaceMMRA(const MachineInstr &MI, SIAtomicAddrSpace Default) {
+ static constexpr const char *Prefix = "opencl-fence-mem";
+
+ auto MMRA = MMRAMetadata(MI.getMMRAMetadata());
+ if (!MMRA)
+ return Default;
+
+ SIAtomicAddrSpace AS = SIAtomicAddrSpace::NONE;
+ if (MMRA.hasTag(Prefix, "global"))
+ AS |= SIAtomicAddrSpace::GLOBAL;
+ if (MMRA.hasTag(Prefix, "local"))
+ AS |= SIAtomicAddrSpace::LDS;
+ if (MMRA.hasTag(Prefix, "image"))
+ AS |= SIAtomicAddrSpace::SCRATCH;
+
+ return (AS != SIAtomicAddrSpace::NONE) ? AS : Default;
+}
+
} // end namespace anonymous
void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
@@ -2535,12 +2558,19 @@ bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
AtomicPseudoMIs.push_back(MI);
bool Changed = false;
+ // Refine fenced address space based on MMRAs.
+ //
+ // TODO: Should we support this MMRA on other atomic operations?
+ // Frontend doesn't directly emit those. Can optimizations merge
+ // an atomic load w/ a fence and give us, e.g. load acquire with this MMRA?
+ auto OrderingAddrSpace =
+ getOpenCLFenceAddrSpaceMMRA(*MI, MOI.getOrderingAddrSpace());
+
if (MOI.isAtomic()) {
if (MOI.getOrdering() == AtomicOrdering::Acquire)
- Changed |= CC->insertWait(MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
- SIMemOp::LOAD | SIMemOp::STORE,
- MOI.getIsCrossAddressSpaceOrdering(),
- Position::BEFORE);
+ Changed |= CC->insertWait(
+ MI, MOI.getScope(), OrderingAddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
+ MOI.getIsCrossAddressSpaceOrdering(), Position::BEFORE);
if (MOI.getOrdering() == AtomicOrdering::Release ||
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
@@ -2552,8 +2582,7 @@ bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
/// generate a fence. Could add support in this file for
/// barrier. SIInsertWaitcnt.cpp could then stop unconditionally
/// adding S_WAITCNT before a S_BARRIER.
- Changed |= CC->insertRelease(MI, MOI.getScope(),
- MOI.getOrderingAddrSpace(),
+ Changed |= CC->insertRelease(MI, MOI.getScope(), OrderingAddrSpace,
MOI.getIsCrossAddressSpaceOrdering(),
Position::BEFORE);
@@ -2565,8 +2594,7 @@ bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
if (MOI.getOrdering() == AtomicOrdering::Acquire ||
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
- Changed |= CC->insertAcquire(MI, MOI.getScope(),
- MOI.getOrderingAddrSpace(),
+ Changed |= CC->insertAcquire(MI, MOI.getScope(), OrderingAddrSpace,
Position::BEFORE);
return Changed;
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
new file mode 100644
index 0000000000000..1ddfab7acc659
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
@@ -0,0 +1,1428 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX6 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX10-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
+
+define amdgpu_kernel void @workgroup_acquire_fence() {
+; GFX6-LABEL: workgroup_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_acquire_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_acquire_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_inv sc0
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup") acquire, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_release_fence() {
+; GFX6-LABEL: workgroup_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_release_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_release_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup") release, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_acq_rel_fence() {
+; GFX6-LABEL: workgroup_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_acq_rel_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_inv sc0
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup") acq_rel, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_seq_cst_fence() {
+; GFX6-LABEL: workgroup_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_seq_cst_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_inv sc0
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup") seq_cst, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
+; GFX6-LABEL: workgroup_one_as_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_one_as_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_one_as_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_one_as_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_one_as_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_one_as_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_acquire_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_one_as_acquire_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_inv sc0
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_one_as_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_one_as_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup-one-as") acquire, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_one_as_release_fence() {
+; GFX6-LABEL: workgroup_one_as_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_one_as_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_one_as_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_one_as_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_one_as_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_one_as_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_one_as_release_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_one_as_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_one_as_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup-one-as") release, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
+; GFX6-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_one_as_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_inv sc0
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
+; GFX6-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_one_as_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_inv sc0
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_acquire_fence() {
+; GFX6-LABEL: agent_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: buffer_wbinvl1
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: buffer_wbinvl1_vol
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: buffer_gl1_inv
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: buffer_gl1_inv
+; GFX10-CU-NEXT: buffer_gl0_inv
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_acquire_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_acquire_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_inv sc1
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: buffer_gl1_inv
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: buffer_gl1_inv
+; GFX11-CU-NEXT: buffer_gl0_inv
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent") acquire, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_release_fence() {
+; GFX6-LABEL: agent_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_release_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_release_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent") release, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_acq_rel_fence() {
+; GFX6-LABEL: agent_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: buffer_wbinvl1
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: buffer_wbinvl1_vol
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: buffer_gl1_inv
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: buffer_gl1_inv
+; GFX10-CU-NEXT: buffer_gl0_inv
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_acq_rel_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_acq_rel_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-TGSPLIT-NEXT: buffer_inv sc1
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: buffer_gl1_inv
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: buffer_gl1_inv
+; GFX11-CU-NEXT: buffer_gl0_inv
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent") acq_rel, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_seq_cst_fence() {
+; GFX6-LABEL: agent_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: buffer_wbinvl1
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: buffer_wbinvl1_vol
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: buffer_gl1_inv
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: buffer_gl1_inv
+; GFX10-CU-NEXT: buffer_gl0_inv
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_seq_cst_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_seq_cst_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-TGSPLIT-NEXT: buffer_inv sc1
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: buffer_gl1_inv
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: buffer_gl1_inv
+; GFX11-CU-NEXT: buffer_gl0_inv
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent") seq_cst, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_one_as_acquire_fence() {
+; GFX6-LABEL: agent_one_as_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: buffer_wbinvl1
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_one_as_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: buffer_wbinvl1_vol
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_one_as_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: buffer_gl1_inv
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_one_as_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: buffer_gl1_inv
+; GFX10-CU-NEXT: buffer_gl0_inv
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_one_as_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_one_as_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_one_as_acquire_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_inv sc1
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_one_as_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: buffer_gl1_inv
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_one_as_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: buffer_gl1_inv
+; GFX11-CU-NEXT: buffer_gl0_inv
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent-one-as") acquire, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_one_as_release_fence() {
+; GFX6-LABEL: agent_one_as_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_one_as_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_one_as_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_one_as_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_one_as_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_one_as_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_one_as_release_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_one_as_release_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_one_as_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_one_as_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent-one-as") release, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
+; GFX6-LABEL: agent_one_as_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: buffer_wbinvl1
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_one_as_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: buffer_wbinvl1_vol
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_one_as_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: buffer_gl1_inv
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_one_as_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: buffer_gl1_inv
+; GFX10-CU-NEXT: buffer_gl0_inv
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_one_as_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_one_as_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_one_as_acq_rel_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-TGSPLIT-NEXT: buffer_inv sc1
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_one_as_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: buffer_gl1_inv
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_one_as_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: buffer_gl1_inv
+; GFX11-CU-NEXT: buffer_gl0_inv
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent-one-as") acq_rel, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
+; GFX6-LABEL: agent_one_as_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: buffer_wbinvl1
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_one_as_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: buffer_wbinvl1_vol
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_one_as_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: buffer_gl1_inv
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_one_as_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: buffer_gl1_inv
+; GFX10-CU-NEXT: buffer_gl0_inv
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_one_as_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_one_as_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_one_as_seq_cst_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-TGSPLIT-NEXT: buffer_inv sc1
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_one_as_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: buffer_gl1_inv
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_one_as_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: buffer_gl1_inv
+; GFX11-CU-NEXT: buffer_gl0_inv
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent-one-as") seq_cst, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @system_acquire_fence() {
+; GFX6-LABEL: system_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: buffer_wbinvl1
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: buffer_wbinvl1_vol
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: buffer_gl1_inv
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: buffer_gl1_inv
+; GFX10-CU-NEXT: buffer_gl0_inv
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_invl2
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_acquire_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_acquire_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: buffer_gl1_inv
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: buffer_gl1_inv
+; GFX11-CU-NEXT: buffer_gl0_inv
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence acquire, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @system_release_fence() {
+; GFX6-LABEL: system_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_wbl2
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_release_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_release_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence release, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @system_acq_rel_fence() {
+; GFX6-LABEL: system_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: buffer_wbinvl1
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: buffer_wbinvl1_vol
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: buffer_gl1_inv
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: buffer_gl1_inv
+; GFX10-CU-NEXT: buffer_gl0_inv
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2
+; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_wbl2
+; GFX90A-TGSPLIT-NEXT: buffer_invl2
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_acq_rel_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_acq_rel_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: buffer_gl1_inv
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: buffer_gl1_inv
+; GFX11-CU-NEXT: buffer_gl0_inv
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence acq_rel, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @system_seq_cst_fence() {
+; GFX6-LABEL: system_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: buffer_wbinvl1
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: buffer_wbinvl1_vol
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: buffer_gl1_inv
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: buffer_gl1_inv
+; GFX10-CU-NEXT: buffer_gl0_inv
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2
+; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_wbl2
+; GFX90A-TGSPLIT-NEXT: buffer_invl2
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_seq_cst_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_seq_cst_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: buffer_gl1_inv
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: buffer_gl1_inv
+; GFX11-CU-NEXT: buffer_gl0_inv
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence seq_cst, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @system_one_as_acquire_fence() {
+; GFX6-LABEL: system_one_as_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: buffer_wbinvl1
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_one_as_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: buffer_wbinvl1_vol
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_one_as_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: buffer_gl1_inv
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_one_as_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: buffer_gl1_inv
+; GFX10-CU-NEXT: buffer_gl0_inv
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_one_as_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_one_as_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_invl2
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_one_as_acquire_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_one_as_acquire_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_one_as_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: buffer_gl1_inv
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_one_as_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: buffer_gl1_inv
+; GFX11-CU-NEXT: buffer_gl0_inv
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("one-as") acquire, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @system_one_as_release_fence() {
+; GFX6-LABEL: system_one_as_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_one_as_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_one_as_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_one_as_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_one_as_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_one_as_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_one_as_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_wbl2
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_one_as_release_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_one_as_release_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_one_as_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_one_as_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("one-as") release, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @system_one_as_acq_rel_fence() {
+; GFX6-LABEL: system_one_as_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: buffer_wbinvl1
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_one_as_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: buffer_wbinvl1_vol
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_one_as_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: buffer_gl1_inv
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_one_as_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: buffer_gl1_inv
+; GFX10-CU-NEXT: buffer_gl0_inv
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_one_as_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2
+; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_one_as_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_wbl2
+; GFX90A-TGSPLIT-NEXT: buffer_invl2
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_one_as_acq_rel_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_one_as_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: buffer_gl1_inv
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_one_as_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: buffer_gl1_inv
+; GFX11-CU-NEXT: buffer_gl0_inv
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("one-as") acq_rel, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
+
+define amdgpu_kernel void @system_one_as_seq_cst_fence() {
+; GFX6-LABEL: system_one_as_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: buffer_wbinvl1
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_one_as_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: buffer_wbinvl1_vol
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_one_as_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: buffer_gl1_inv
+; GFX10-WGP-NEXT: buffer_gl0_inv
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_one_as_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: buffer_gl1_inv
+; GFX10-CU-NEXT: buffer_gl0_inv
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_one_as_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2
+; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2
+; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_one_as_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: buffer_wbl2
+; GFX90A-TGSPLIT-NEXT: buffer_invl2
+; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_one_as_seq_cst_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_one_as_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: buffer_gl1_inv
+; GFX11-WGP-NEXT: buffer_gl0_inv
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_one_as_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: buffer_gl1_inv
+; GFX11-CU-NEXT: buffer_gl0_inv
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("one-as") seq_cst, !mmra !{!"opencl-fence-mem", !"global"}
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
new file mode 100644
index 0000000000000..b02b53342e9e8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
@@ -0,0 +1,1188 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX6 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX10-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
+
+define amdgpu_kernel void @workgroup_acquire_fence() {
+; GFX6-LABEL: workgroup_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_acquire_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_acquire_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup") acquire, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_release_fence() {
+; GFX6-LABEL: workgroup_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_release_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_release_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup") release, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_acq_rel_fence() {
+; GFX6-LABEL: workgroup_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_acq_rel_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup") acq_rel, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_seq_cst_fence() {
+; GFX6-LABEL: workgroup_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_seq_cst_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup") seq_cst, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
+; GFX6-LABEL: workgroup_one_as_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_one_as_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_one_as_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_one_as_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_one_as_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_one_as_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_acquire_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_one_as_acquire_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_one_as_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_one_as_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup-one-as") acquire, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_one_as_release_fence() {
+; GFX6-LABEL: workgroup_one_as_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_one_as_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_one_as_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_one_as_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_one_as_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_one_as_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_one_as_release_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_one_as_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_one_as_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup-one-as") release, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
+; GFX6-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_one_as_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
+; GFX6-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_one_as_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_acquire_fence() {
+; GFX6-LABEL: agent_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_acquire_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_acquire_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent") acquire, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_release_fence() {
+; GFX6-LABEL: agent_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_release_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_release_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent") release, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_acq_rel_fence() {
+; GFX6-LABEL: agent_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_acq_rel_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_acq_rel_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent") acq_rel, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_seq_cst_fence() {
+; GFX6-LABEL: agent_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_seq_cst_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_seq_cst_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent") seq_cst, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_one_as_acquire_fence() {
+; GFX6-LABEL: agent_one_as_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_one_as_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_one_as_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_one_as_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_one_as_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_one_as_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_one_as_acquire_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_one_as_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_one_as_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent-one-as") acquire, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_one_as_release_fence() {
+; GFX6-LABEL: agent_one_as_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_one_as_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_one_as_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_one_as_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_one_as_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_one_as_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_one_as_release_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_one_as_release_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_one_as_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_one_as_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent-one-as") release, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
+; GFX6-LABEL: agent_one_as_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_one_as_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_one_as_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_one_as_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_one_as_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_one_as_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_one_as_acq_rel_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_one_as_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_one_as_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent-one-as") acq_rel, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
+; GFX6-LABEL: agent_one_as_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_one_as_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_one_as_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_one_as_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_one_as_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_one_as_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_one_as_seq_cst_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_one_as_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_one_as_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent-one-as") seq_cst, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @system_acquire_fence() {
+; GFX6-LABEL: system_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_acquire_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_acquire_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence acquire, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @system_release_fence() {
+; GFX6-LABEL: system_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_release_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_release_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence release, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @system_acq_rel_fence() {
+; GFX6-LABEL: system_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_acq_rel_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_acq_rel_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence acq_rel, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @system_seq_cst_fence() {
+; GFX6-LABEL: system_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_seq_cst_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_seq_cst_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence seq_cst, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @system_one_as_acquire_fence() {
+; GFX6-LABEL: system_one_as_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_one_as_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_one_as_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_one_as_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_one_as_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_one_as_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_one_as_acquire_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_one_as_acquire_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_one_as_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_one_as_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("one-as") acquire, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @system_one_as_release_fence() {
+; GFX6-LABEL: system_one_as_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_one_as_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_one_as_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_one_as_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_one_as_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_one_as_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_one_as_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_one_as_release_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_one_as_release_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_one_as_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_one_as_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("one-as") release, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @system_one_as_acq_rel_fence() {
+; GFX6-LABEL: system_one_as_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_one_as_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_one_as_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_one_as_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_one_as_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_one_as_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_one_as_acq_rel_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_one_as_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_one_as_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("one-as") acq_rel, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
+
+define amdgpu_kernel void @system_one_as_seq_cst_fence() {
+; GFX6-LABEL: system_one_as_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_one_as_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_one_as_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_one_as_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_one_as_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_one_as_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_one_as_seq_cst_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_one_as_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_one_as_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("one-as") seq_cst, !mmra !{!"opencl-fence-mem", !"local"}
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-private.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-private.ll
new file mode 100644
index 0000000000000..0e3c258f52c00
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-private.ll
@@ -0,0 +1,1188 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX6 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX10-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
+
+define amdgpu_kernel void @workgroup_acquire_fence() {
+; GFX6-LABEL: workgroup_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_acquire_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_acquire_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup") acquire, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_release_fence() {
+; GFX6-LABEL: workgroup_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_release_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_release_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup") release, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_acq_rel_fence() {
+; GFX6-LABEL: workgroup_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_acq_rel_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup") acq_rel, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_seq_cst_fence() {
+; GFX6-LABEL: workgroup_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_seq_cst_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup") seq_cst, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
+; GFX6-LABEL: workgroup_one_as_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_one_as_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_one_as_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_one_as_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_one_as_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_one_as_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_acquire_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_one_as_acquire_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_one_as_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_one_as_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup-one-as") acquire, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_one_as_release_fence() {
+; GFX6-LABEL: workgroup_one_as_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_one_as_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_one_as_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_one_as_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_one_as_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_one_as_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_one_as_release_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_one_as_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_one_as_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup-one-as") release, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
+; GFX6-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_one_as_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
+; GFX6-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: workgroup_one_as_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: workgroup_one_as_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_acquire_fence() {
+; GFX6-LABEL: agent_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_acquire_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_acquire_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent") acquire, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_release_fence() {
+; GFX6-LABEL: agent_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_release_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_release_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent") release, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_acq_rel_fence() {
+; GFX6-LABEL: agent_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_acq_rel_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_acq_rel_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent") acq_rel, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_seq_cst_fence() {
+; GFX6-LABEL: agent_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_seq_cst_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_seq_cst_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent") seq_cst, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_one_as_acquire_fence() {
+; GFX6-LABEL: agent_one_as_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_one_as_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_one_as_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_one_as_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_one_as_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_one_as_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_one_as_acquire_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_one_as_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_one_as_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent-one-as") acquire, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_one_as_release_fence() {
+; GFX6-LABEL: agent_one_as_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_one_as_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_one_as_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_one_as_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_one_as_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_one_as_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_one_as_release_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_one_as_release_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_one_as_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_one_as_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent-one-as") release, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
+; GFX6-LABEL: agent_one_as_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_one_as_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_one_as_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_one_as_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_one_as_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_one_as_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_one_as_acq_rel_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_one_as_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_one_as_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent-one-as") acq_rel, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
+; GFX6-LABEL: agent_one_as_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: agent_one_as_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: agent_one_as_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: agent_one_as_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: agent_one_as_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: agent_one_as_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: agent_one_as_seq_cst_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: agent_one_as_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: agent_one_as_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("agent-one-as") seq_cst, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @system_acquire_fence() {
+; GFX6-LABEL: system_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_acquire_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_acquire_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence acquire, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @system_release_fence() {
+; GFX6-LABEL: system_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_release_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_release_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence release, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @system_acq_rel_fence() {
+; GFX6-LABEL: system_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_acq_rel_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_acq_rel_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence acq_rel, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @system_seq_cst_fence() {
+; GFX6-LABEL: system_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_seq_cst_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_seq_cst_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence seq_cst, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @system_one_as_acquire_fence() {
+; GFX6-LABEL: system_one_as_acquire_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_one_as_acquire_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_one_as_acquire_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_one_as_acquire_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_one_as_acquire_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acquire_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_one_as_acquire_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_one_as_acquire_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_one_as_acquire_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_one_as_acquire_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_one_as_acquire_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("one-as") acquire, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @system_one_as_release_fence() {
+; GFX6-LABEL: system_one_as_release_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_one_as_release_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_one_as_release_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_one_as_release_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_one_as_release_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_one_as_release_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_one_as_release_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_one_as_release_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_one_as_release_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_one_as_release_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_one_as_release_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("one-as") release, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @system_one_as_acq_rel_fence() {
+; GFX6-LABEL: system_one_as_acq_rel_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_one_as_acq_rel_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_one_as_acq_rel_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_one_as_acq_rel_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_one_as_acq_rel_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_one_as_acq_rel_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_one_as_acq_rel_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_one_as_acq_rel_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_one_as_acq_rel_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("one-as") acq_rel, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
+
+define amdgpu_kernel void @system_one_as_seq_cst_fence() {
+; GFX6-LABEL: system_one_as_seq_cst_fence:
+; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: system_one_as_seq_cst_fence:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_endpgm
+;
+; GFX10-WGP-LABEL: system_one_as_seq_cst_fence:
+; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_endpgm
+;
+; GFX10-CU-LABEL: system_one_as_seq_cst_fence:
+; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_endpgm
+;
+; SKIP-CACHE-INV-LABEL: system_one_as_seq_cst_fence:
+; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_endpgm
+;
+; GFX90A-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence:
+; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX90A-TGSPLIT-LABEL: system_one_as_seq_cst_fence:
+; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_endpgm
+;
+; GFX940-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence:
+; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_endpgm
+;
+; GFX940-TGSPLIT-LABEL: system_one_as_seq_cst_fence:
+; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_endpgm
+;
+; GFX11-WGP-LABEL: system_one_as_seq_cst_fence:
+; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_endpgm
+;
+; GFX11-CU-LABEL: system_one_as_seq_cst_fence:
+; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_endpgm
+entry:
+ fence syncscope("one-as") seq_cst, !mmra !{!"opencl-fence-mem", !"image"}
+ ret void
+}
>From 3914434c8d7cb044244aeafa864f694dc7e17a17 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Fri, 26 Apr 2024 10:38:45 +0200
Subject: [PATCH 2/9] add docs
---
clang/docs/LanguageExtensions.rst | 55 +++++++++++++++++++++++++++++++
1 file changed, 55 insertions(+)
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index a09c409f8f91a..946a9cba84ae4 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -4403,6 +4403,7 @@ immediately after the name being declared.
For example, this applies the GNU ``unused`` attribute to ``a`` and ``f``, and
also applies the GNU ``noreturn`` attribute to ``f``.
+Examples:
.. code-block:: c++
[[gnu::unused]] int a, f [[gnu::noreturn]] ();
@@ -4412,6 +4413,60 @@ Target-Specific Extensions
Clang supports some language features conditionally on some targets.
+AMDGPU Language Extensions
+--------------------------
+
+__builtin_amdgcn_fence
+^^^^^^^^^^^^^^^^^^^^^^
+
+``__builtin_amdgcn_fence`` emits a fence for all address spaces
+and takes the following arguments:
+
+* ``unsigned`` atomic ordering, e.g. ``__ATOMIC_ACQUIRE``
+* ``const char *`` synchronization scope, e.g. ``workgroup``
+
+.. code-block:: c++
+
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
+ __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
+
+__builtin_amdgcn_masked_fence
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``__builtin_amdgcn_masked_fence`` emits a fence for one or more address
+spaces.
+
+* ``unsigned`` address space mask
+* ``unsigned`` atomic ordering, e.g. ``__ATOMIC_ACQUIRE``
+* ``const char *`` synchronization scope, e.g. ``workgroup``
+
+The address space mask is a bitmask and each bit corresponds
+to an OpenCL memory type:
+
+* ``0x1`` (first bit) is for local memory.
+* ``0x2`` (second bit) is for global memory.
+* ``0x4`` (third bit) is for image memory.
+
+A bitmask of zero causes this fence to behave like
+``__builtin_amdgcn_fence``.
+Different values can of course be OR'd together. Examples:
+
+.. code-block:: c++
+
+ // Fence local only
+ __builtin_amdgcn_masked_fence(0x1, __ATOMIC_SEQ_CST, "workgroup")
+
+ // Fence local & global
+ __builtin_amdgcn_masked_fence(0x3, __ATOMIC_SEQ_CST, "workgroup")
+
+Note that this fence may affect more than just the address spaces
+specified. In some cases, the address space mask may
+be lost during optimization and a normal fence for all address
+spaces (``__builtin_amdgcn_fence``) will be emitted instead.
+This generally happens if the address space mask restricts an
+optimization, and the performance cost of skipping that optimization
+is greater than the cost of performing a more expensive fence operation.
+
ARM/AArch64 Language Extensions
-------------------------------
>From a5eb7ee0a7fcf01a373fe3ea0f58dd0eff0eb090 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Thu, 2 May 2024 13:48:07 +0200
Subject: [PATCH 3/9] comments
---
clang/docs/LanguageExtensions.rst | 32 +++++-----
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 +-
clang/lib/CodeGen/CGBuiltin.cpp | 36 ++++++------
clang/lib/CodeGen/CodeGenFunction.h | 2 +-
clang/lib/Sema/SemaChecking.cpp | 5 +-
.../builtin-amdgcn-fence-opencl.cpp | 58 ++++++++-----------
6 files changed, 58 insertions(+), 77 deletions(-)
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 946a9cba84ae4..1f6692ebe2c75 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -4434,38 +4434,32 @@ __builtin_amdgcn_masked_fence
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
``__builtin_amdgcn_masked_fence`` emits a fence for one or more address
-spaces.
+spaces and takes the following arguments:
-* ``unsigned`` address space mask
* ``unsigned`` atomic ordering, e.g. ``__ATOMIC_ACQUIRE``
* ``const char *`` synchronization scope, e.g. ``workgroup``
+* Zero or more ``const char *`` address spaces.
-The address space mask is a bitmask and each bit corresponds
-to an OpenCL memory type:
+The address spaces arguments must be string literals with known values, such as:
-* ``0x1`` (first bit) is for local memory.
-* ``0x2`` (second bit) is for global memory.
-* ``0x4`` (third bit) is for image memory.
+* ``"local"``
+* ``"global"``
+* ``"image"``
-A bitmask of zero causes this fence to behave like
+If there are no address spaces specified, this fence behaves like
``__builtin_amdgcn_fence``.
-Different values can of course be OR'd together. Examples:
-.. code-block:: c++
+Examples:
- // Fence local only
- __builtin_amdgcn_masked_fence(0x1, __ATOMIC_SEQ_CST, "workgroup")
+.. code-block:: c++
- // Fence local & global
- __builtin_amdgcn_masked_fence(0x3, __ATOMIC_SEQ_CST, "workgroup")
+ __builtin_amdgcn_masked_fence(__ATOMIC_SEQ_CST, "workgroup", "local")
+ __builtin_amdgcn_masked_fence(__ATOMIC_SEQ_CST, "workgroup", "local", "global")
Note that this fence may affect more than just the address spaces
-specified. In some cases, the address space mask may
+specified; in some cases, the address space mask may
be lost during optimization and a normal fence for all address
spaces (``__builtin_amdgcn_fence``) will be emitted instead.
-This generally happens if the address space mask restricts an
-optimization, and the performance cost of skipping that optimization
-is greater than the cost of performing a more expensive fence operation.
ARM/AArch64 Language Extensions
-------------------------------
@@ -5657,4 +5651,4 @@ Compiling different TUs depending on these flags (including use of
``std::hardware_constructive_interference`` or
``std::hardware_destructive_interference``) with different compilers, macro
definitions, or architecture flags will lead to ODR violations and should be
-avoided.
\ No newline at end of file
+avoided.
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 23b7061255e31..c1f53b1c9fbfe 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -69,7 +69,7 @@ BUILTIN(__builtin_amdgcn_iglp_opt, "vIi", "n")
BUILTIN(__builtin_amdgcn_s_dcache_inv, "v", "n")
BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n")
BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n")
-BUILTIN(__builtin_amdgcn_masked_fence, "vUiUicC*", "n")
+BUILTIN(__builtin_amdgcn_masked_fence, "vUicC*.", "n")
BUILTIN(__builtin_amdgcn_groupstaticsize, "Ui", "n")
BUILTIN(__builtin_amdgcn_wavefrontsize, "Ui", "nc")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 74a134ca17c73..d5927fb42c160 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18415,22 +18415,23 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
}
void CodeGenFunction::AddAMDGCNAddressSpaceMMRA(llvm::Instruction *Inst,
- llvm::Value *ASMask) {
+ const CallExpr *E, unsigned FirstASNameIdx) {
constexpr const char *Tag = "opencl-fence-mem";
- uint64_t Mask = cast<llvm::ConstantInt>(ASMask)->getZExtValue();
- if (Mask == 0)
- return;
-
- // 3 bits can be set: local, global, image in that order.
LLVMContext &Ctx = Inst->getContext();
SmallVector<MMRAMetadata::TagT, 3> MMRAs;
- if (Mask & (1 << 0))
- MMRAs.push_back({Tag, "local"});
- if (Mask & (1 << 1))
- MMRAs.push_back({Tag, "global"});
- if (Mask & (1 << 2))
- MMRAs.push_back({Tag, "image"});
+ for(unsigned K = FirstASNameIdx; K < E->getNumArgs(); ++K) {
+ llvm::Value *V = EmitScalarExpr(E->getArg(K));
+ StringRef AS;
+ if(llvm::getConstantStringInfo(V, AS) && (AS == "local" || AS == "global" || AS == "image")) {
+ MMRAs.push_back({Tag, AS});
+ // TODO: Delete the resulting unused constant?
+ continue;
+ }
+ CGM.Error(E->getExprLoc(), "expected one of \"local\", \"global\" or \"image\"");
+ }
+
+ llvm::unique(MMRAs);
Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
}
@@ -19101,16 +19102,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
return Builder.CreateCall(F, { Src0, Src1, Src2 });
}
- case AMDGPU::BI__builtin_amdgcn_fence: {
+ case AMDGPU::BI__builtin_amdgcn_fence:
+ case AMDGPU::BI__builtin_amdgcn_masked_fence: {
ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
EmitScalarExpr(E->getArg(1)), AO, SSID);
- return Builder.CreateFence(AO, SSID);
- }
- case AMDGPU::BI__builtin_amdgcn_masked_fence: {
- ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(1)),
- EmitScalarExpr(E->getArg(2)), AO, SSID);
FenceInst *Fence = Builder.CreateFence(AO, SSID);
- AddAMDGCNAddressSpaceMMRA(Fence, EmitScalarExpr(E->getArg(0)));
+ if (BuiltinID == AMDGPU::BI__builtin_amdgcn_masked_fence && E->getNumArgs() > 2)
+ AddAMDGCNAddressSpaceMMRA(Fence, E, 2);
return Fence;
}
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 9a6200ef58882..0a17884d3d3b3 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4632,7 +4632,7 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue);
- void AddAMDGCNAddressSpaceMMRA(llvm::Instruction *Inst, llvm::Value *ASMask);
+ void AddAMDGCNAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E, unsigned FirstASNameIdx);
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope,
llvm::AtomicOrdering &AO,
llvm::SyncScope::ID &SSID);
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 3a4211b3922c9..8c1770aa6b8ea 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5704,13 +5704,10 @@ bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
ScopeIndex = 3;
break;
case AMDGPU::BI__builtin_amdgcn_fence:
+ case AMDGPU::BI__builtin_amdgcn_masked_fence:
OrderIndex = 0;
ScopeIndex = 1;
break;
- case AMDGPU::BI__builtin_amdgcn_masked_fence:
- OrderIndex = 1;
- ScopeIndex = 2;
- break;
default:
return false;
}
diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-fence-opencl.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-fence-opencl.cpp
index 51bf4e7c2e60f..406eebd89c3de 100644
--- a/clang/test/CodeGenCXX/builtin-amdgcn-fence-opencl.cpp
+++ b/clang/test/CodeGenCXX/builtin-amdgcn-fence-opencl.cpp
@@ -3,14 +3,13 @@
// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
// RUN: -triple=amdgcn-amd-amdhsa -Qn -mcode-object-version=none | FileCheck %s
-#define LOCAL_MASK (1 << 0)
-#define GLOBAL_MASK (1 << 1)
-#define IMAGE_MASK (1 << 2)
-
//.
// CHECK: @.str = private unnamed_addr addrspace(4) constant [10 x i8] c"workgroup\00", align 1
-// CHECK: @.str.1 = private unnamed_addr addrspace(4) constant [6 x i8] c"agent\00", align 1
-// CHECK: @.str.2 = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
+// CHECK: @.str.1 = private unnamed_addr addrspace(4) constant [6 x i8] c"local\00", align 1
+// CHECK: @.str.2 = private unnamed_addr addrspace(4) constant [6 x i8] c"agent\00", align 1
+// CHECK: @.str.3 = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
+// CHECK: @.str.4 = private unnamed_addr addrspace(4) constant [7 x i8] c"global\00", align 1
+// CHECK: @.str.5 = private unnamed_addr addrspace(4) constant [6 x i8] c"image\00", align 1
//.
// CHECK-LABEL: define dso_local void @_Z10test_localv(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
@@ -23,16 +22,15 @@
// CHECK-NEXT: ret void
//
void test_local() {
+ __builtin_amdgcn_masked_fence( __ATOMIC_SEQ_CST, "workgroup", "local");
- __builtin_amdgcn_masked_fence(LOCAL_MASK, __ATOMIC_SEQ_CST, "workgroup");
-
- __builtin_amdgcn_masked_fence(LOCAL_MASK,__ATOMIC_ACQUIRE, "agent");
+ __builtin_amdgcn_masked_fence(__ATOMIC_ACQUIRE, "agent", "local");
- __builtin_amdgcn_masked_fence(LOCAL_MASK,__ATOMIC_SEQ_CST, "");
+ __builtin_amdgcn_masked_fence(__ATOMIC_SEQ_CST, "", "local");
- __builtin_amdgcn_masked_fence(LOCAL_MASK, 4, "agent");
+ __builtin_amdgcn_masked_fence(4, "agent", "local");
- __builtin_amdgcn_masked_fence(LOCAL_MASK, 3, "workgroup");
+ __builtin_amdgcn_masked_fence(3, "workgroup", "local");
}
// CHECK-LABEL: define dso_local void @_Z11test_globalv(
@@ -46,16 +44,15 @@ void test_local() {
// CHECK-NEXT: ret void
//
void test_global() {
+ __builtin_amdgcn_masked_fence( __ATOMIC_SEQ_CST, "workgroup", "global");
- __builtin_amdgcn_masked_fence(GLOBAL_MASK, __ATOMIC_SEQ_CST, "workgroup");
+ __builtin_amdgcn_masked_fence(__ATOMIC_ACQUIRE, "agent", "global");
- __builtin_amdgcn_masked_fence(GLOBAL_MASK,__ATOMIC_ACQUIRE, "agent");
+ __builtin_amdgcn_masked_fence(__ATOMIC_SEQ_CST, "", "global");
- __builtin_amdgcn_masked_fence(GLOBAL_MASK,__ATOMIC_SEQ_CST, "");
+ __builtin_amdgcn_masked_fence(4, "agent", "global");
- __builtin_amdgcn_masked_fence(GLOBAL_MASK, 4, "agent");
-
- __builtin_amdgcn_masked_fence(GLOBAL_MASK, 3, "workgroup");
+ __builtin_amdgcn_masked_fence(3, "workgroup", "global");
}
// CHECK-LABEL: define dso_local void @_Z10test_imagev(
@@ -63,22 +60,21 @@ void test_global() {
// CHECK-NEXT: entry:
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]]
// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META3]]
-// CHECK-NEXT: fence seq_cst, !mmra [[META2]]
+// CHECK-NEXT: fence seq_cst, !mmra [[META3]]
// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
// CHECK-NEXT: ret void
//
void test_image() {
+ __builtin_amdgcn_masked_fence( __ATOMIC_SEQ_CST, "workgroup", "image");
- __builtin_amdgcn_masked_fence(IMAGE_MASK, __ATOMIC_SEQ_CST, "workgroup");
+ __builtin_amdgcn_masked_fence(__ATOMIC_ACQUIRE, "agent", "image");
- __builtin_amdgcn_masked_fence(IMAGE_MASK,__ATOMIC_ACQUIRE, "agent");
+ __builtin_amdgcn_masked_fence(__ATOMIC_SEQ_CST, "", "image");
- __builtin_amdgcn_masked_fence(GLOBAL_MASK,__ATOMIC_SEQ_CST, "");
+ __builtin_amdgcn_masked_fence(4, "agent", "image");
- __builtin_amdgcn_masked_fence(IMAGE_MASK, 4, "agent");
-
- __builtin_amdgcn_masked_fence(IMAGE_MASK, 3, "workgroup");
+ __builtin_amdgcn_masked_fence(3, "workgroup", "image");
}
// CHECK-LABEL: define dso_local void @_Z10test_mixedv(
@@ -86,15 +82,11 @@ void test_image() {
// CHECK-NEXT: entry:
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]]
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
-// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]]
// CHECK-NEXT: ret void
//
void test_mixed() {
-
- __builtin_amdgcn_masked_fence(IMAGE_MASK | GLOBAL_MASK, __ATOMIC_SEQ_CST, "workgroup");
- __builtin_amdgcn_masked_fence(IMAGE_MASK | GLOBAL_MASK | LOCAL_MASK, __ATOMIC_SEQ_CST, "workgroup");
-
- __builtin_amdgcn_masked_fence(0xFF,__ATOMIC_SEQ_CST, "workgroup");
+ __builtin_amdgcn_masked_fence( __ATOMIC_SEQ_CST, "workgroup", "image", "global");
+ __builtin_amdgcn_masked_fence( __ATOMIC_SEQ_CST, "workgroup", "image", "local", "global");
}
//.
// CHECK: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
@@ -103,6 +95,6 @@ void test_mixed() {
// CHECK: [[META1]] = !{!"opencl-fence-mem", !"local"}
// CHECK: [[META2]] = !{!"opencl-fence-mem", !"global"}
// CHECK: [[META3]] = !{!"opencl-fence-mem", !"image"}
-// CHECK: [[META4]] = !{[[META2]], [[META3]]}
-// CHECK: [[META5]] = !{[[META1]], [[META2]], [[META3]]}
+// CHECK: [[META4]] = !{[[META3]], [[META2]]}
+// CHECK: [[META5]] = !{[[META3]], [[META1]], [[META2]]}
//.
>From a60856395a2ff3e8ba921d38aa35aeacd97199b5 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Thu, 2 May 2024 13:51:39 +0200
Subject: [PATCH 4/9] fmt
---
clang/lib/CodeGen/CGBuiltin.cpp | 14 +++++++++-----
clang/lib/CodeGen/CodeGenFunction.h | 3 ++-
2 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index d5927fb42c160..87e573330f910 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18415,20 +18415,23 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
}
void CodeGenFunction::AddAMDGCNAddressSpaceMMRA(llvm::Instruction *Inst,
- const CallExpr *E, unsigned FirstASNameIdx) {
+ const CallExpr *E,
+ unsigned FirstASNameIdx) {
constexpr const char *Tag = "opencl-fence-mem";
LLVMContext &Ctx = Inst->getContext();
SmallVector<MMRAMetadata::TagT, 3> MMRAs;
- for(unsigned K = FirstASNameIdx; K < E->getNumArgs(); ++K) {
+ for (unsigned K = FirstASNameIdx; K < E->getNumArgs(); ++K) {
llvm::Value *V = EmitScalarExpr(E->getArg(K));
StringRef AS;
- if(llvm::getConstantStringInfo(V, AS) && (AS == "local" || AS == "global" || AS == "image")) {
+ if (llvm::getConstantStringInfo(V, AS) &&
+ (AS == "local" || AS == "global" || AS == "image")) {
MMRAs.push_back({Tag, AS});
// TODO: Delete the resulting unused constant?
continue;
}
- CGM.Error(E->getExprLoc(), "expected one of \"local\", \"global\" or \"image\"");
+ CGM.Error(E->getExprLoc(),
+ "expected one of \"local\", \"global\" or \"image\"");
}
llvm::unique(MMRAs);
@@ -19107,7 +19110,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
EmitScalarExpr(E->getArg(1)), AO, SSID);
FenceInst *Fence = Builder.CreateFence(AO, SSID);
- if (BuiltinID == AMDGPU::BI__builtin_amdgcn_masked_fence && E->getNumArgs() > 2)
+ if (BuiltinID == AMDGPU::BI__builtin_amdgcn_masked_fence &&
+ E->getNumArgs() > 2)
AddAMDGCNAddressSpaceMMRA(Fence, E, 2);
return Fence;
}
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 0a17884d3d3b3..71ed8fc9d4ecd 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4632,7 +4632,8 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue);
- void AddAMDGCNAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E, unsigned FirstASNameIdx);
+ void AddAMDGCNAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E,
+ unsigned FirstASNameIdx);
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope,
llvm::AtomicOrdering &AO,
llvm::SyncScope::ID &SSID);
>From d8a5a97d5bf0aad0af514a86208b3e182748347c Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Fri, 3 May 2024 10:19:02 +0200
Subject: [PATCH 5/9] Refactor
---
clang/docs/LanguageExtensions.rst | 40 +++----
clang/include/clang/Basic/BuiltinsAMDGPU.def | 3 +-
clang/lib/CodeGen/CGBuiltin.cpp | 17 ++-
clang/lib/CodeGen/CodeGenFunction.h | 4 +-
clang/lib/Sema/SemaChecking.cpp | 4 +-
.../builtin-amdgcn-fence-opencl.cpp | 100 -----------------
.../test/CodeGenCXX/builtin-amdgcn-fence.cpp | 105 ++++++++++++++++--
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 61 +++++++---
.../memory-legalizer-fence-mmra-global.ll | 48 ++++----
.../memory-legalizer-fence-mmra-local.ll | 48 ++++----
.../memory-legalizer-fence-mmra-private.ll | 48 ++++----
11 files changed, 240 insertions(+), 238 deletions(-)
delete mode 100644 clang/test/CodeGenCXX/builtin-amdgcn-fence-opencl.cpp
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 1f6692ebe2c75..8c54452ab0355 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -4419,26 +4419,11 @@ AMDGPU Language Extensions
__builtin_amdgcn_fence
^^^^^^^^^^^^^^^^^^^^^^
-``__builtin_amdgcn_fence`` emits a fence for all address spaces
-and takes the following arguments:
+``__builtin_amdgcn_fence`` emits a fence.
* ``unsigned`` atomic ordering, e.g. ``__ATOMIC_ACQUIRE``
* ``const char *`` synchronization scope, e.g. ``workgroup``
-
-.. code-block:: c++
-
- __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
- __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
-
-__builtin_amdgcn_masked_fence
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-``__builtin_amdgcn_masked_fence`` emits a fence for one or more address
-spaces and takes the following arguments:
-
-* ``unsigned`` atomic ordering, e.g. ``__ATOMIC_ACQUIRE``
-* ``const char *`` synchronization scope, e.g. ``workgroup``
-* Zero or more ``const char *`` address spaces.
+* Zero or more ``const char *`` address spaces names.
The address spaces arguments must be string literals with known values, such as:
@@ -4446,20 +4431,23 @@ The address spaces arguments must be string literals with known values, such as:
* ``"global"``
* ``"image"``
-If there are no address spaces specified, this fence behaves like
-``__builtin_amdgcn_fence``.
+If one or more address space name are provided, the code generator will attempt
+to emit potentially faster instructions that only fence those address spaces.
+Emitting such instructions may not always be possible and the compiler is free
+to fence more aggressively.
-Examples:
+If no address spaces names are provided, all address spaces are fenced.
.. code-block:: c++
- __builtin_amdgcn_masked_fence(__ATOMIC_SEQ_CST, "workgroup", "local")
- __builtin_amdgcn_masked_fence(__ATOMIC_SEQ_CST, "workgroup", "local", "global")
+ // Fence all address spaces.
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
+ __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
+
+ // Fence only requested address spaces.
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local")
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local", "global")
-Note that this fence may affect more than just the address spaces
-specified; in some cases, the address space mask may
-be lost during optimization and a normal fence for all address
-spaces (``__builtin_amdgcn_fence``) will be emitted instead.
ARM/AArch64 Language Extensions
-------------------------------
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index c1f53b1c9fbfe..8c6afc41c7307 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -68,8 +68,7 @@ BUILTIN(__builtin_amdgcn_sched_group_barrier, "vIiIiIi", "n")
BUILTIN(__builtin_amdgcn_iglp_opt, "vIi", "n")
BUILTIN(__builtin_amdgcn_s_dcache_inv, "v", "n")
BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n")
-BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n")
-BUILTIN(__builtin_amdgcn_masked_fence, "vUicC*.", "n")
+BUILTIN(__builtin_amdgcn_fence, "vUicC*.", "n")
BUILTIN(__builtin_amdgcn_groupstaticsize, "Ui", "n")
BUILTIN(__builtin_amdgcn_wavefrontsize, "Ui", "nc")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 87e573330f910..e847a30ed4bbc 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18414,14 +18414,13 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
return nullptr;
}
-void CodeGenFunction::AddAMDGCNAddressSpaceMMRA(llvm::Instruction *Inst,
- const CallExpr *E,
- unsigned FirstASNameIdx) {
- constexpr const char *Tag = "opencl-fence-mem";
+void CodeGenFunction::AddAMDGCNFenceAddressSpaceMMRA(llvm::Instruction *Inst,
+ const CallExpr *E) {
+ constexpr const char *Tag = "amdgpu-as";
LLVMContext &Ctx = Inst->getContext();
SmallVector<MMRAMetadata::TagT, 3> MMRAs;
- for (unsigned K = FirstASNameIdx; K < E->getNumArgs(); ++K) {
+ for (unsigned K = 2; K < E->getNumArgs(); ++K) {
llvm::Value *V = EmitScalarExpr(E->getArg(K));
StringRef AS;
if (llvm::getConstantStringInfo(V, AS) &&
@@ -19105,14 +19104,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
return Builder.CreateCall(F, { Src0, Src1, Src2 });
}
- case AMDGPU::BI__builtin_amdgcn_fence:
- case AMDGPU::BI__builtin_amdgcn_masked_fence: {
+ case AMDGPU::BI__builtin_amdgcn_fence: {
ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
EmitScalarExpr(E->getArg(1)), AO, SSID);
FenceInst *Fence = Builder.CreateFence(AO, SSID);
- if (BuiltinID == AMDGPU::BI__builtin_amdgcn_masked_fence &&
- E->getNumArgs() > 2)
- AddAMDGCNAddressSpaceMMRA(Fence, E, 2);
+ if (E->getNumArgs() > 2)
+ AddAMDGCNFenceAddressSpaceMMRA(Fence, E);
return Fence;
}
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 71ed8fc9d4ecd..1a6610146b228 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4632,8 +4632,8 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue);
- void AddAMDGCNAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E,
- unsigned FirstASNameIdx);
+ void AddAMDGCNFenceAddressSpaceMMRA(llvm::Instruction *Inst,
+ const CallExpr *E);
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope,
llvm::AtomicOrdering &AO,
llvm::SyncScope::ID &SSID);
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 8c1770aa6b8ea..54789dde50691 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5704,7 +5704,6 @@ bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
ScopeIndex = 3;
break;
case AMDGPU::BI__builtin_amdgcn_fence:
- case AMDGPU::BI__builtin_amdgcn_masked_fence:
OrderIndex = 0;
ScopeIndex = 1;
break;
@@ -5730,8 +5729,7 @@ bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
switch (static_cast<llvm::AtomicOrderingCABI>(Ord)) {
case llvm::AtomicOrderingCABI::relaxed:
case llvm::AtomicOrderingCABI::consume:
- if (BuiltinID == AMDGPU::BI__builtin_amdgcn_fence ||
- BuiltinID == AMDGPU::BI__builtin_amdgcn_masked_fence)
+ if (BuiltinID == AMDGPU::BI__builtin_amdgcn_fence)
return Diag(ArgExpr->getBeginLoc(),
diag::warn_atomic_op_has_invalid_memory_order)
<< 0 << ArgExpr->getSourceRange();
diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-fence-opencl.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-fence-opencl.cpp
deleted file mode 100644
index 406eebd89c3de..0000000000000
--- a/clang/test/CodeGenCXX/builtin-amdgcn-fence-opencl.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --version 3
-// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
-// RUN: -triple=amdgcn-amd-amdhsa -Qn -mcode-object-version=none | FileCheck %s
-
-//.
-// CHECK: @.str = private unnamed_addr addrspace(4) constant [10 x i8] c"workgroup\00", align 1
-// CHECK: @.str.1 = private unnamed_addr addrspace(4) constant [6 x i8] c"local\00", align 1
-// CHECK: @.str.2 = private unnamed_addr addrspace(4) constant [6 x i8] c"agent\00", align 1
-// CHECK: @.str.3 = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
-// CHECK: @.str.4 = private unnamed_addr addrspace(4) constant [7 x i8] c"global\00", align 1
-// CHECK: @.str.5 = private unnamed_addr addrspace(4) constant [6 x i8] c"image\00", align 1
-//.
-// CHECK-LABEL: define dso_local void @_Z10test_localv(
-// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META1:![0-9]+]]
-// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META1]]
-// CHECK-NEXT: fence seq_cst, !mmra [[META1]]
-// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META1]]
-// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META1]]
-// CHECK-NEXT: ret void
-//
-void test_local() {
- __builtin_amdgcn_masked_fence( __ATOMIC_SEQ_CST, "workgroup", "local");
-
- __builtin_amdgcn_masked_fence(__ATOMIC_ACQUIRE, "agent", "local");
-
- __builtin_amdgcn_masked_fence(__ATOMIC_SEQ_CST, "", "local");
-
- __builtin_amdgcn_masked_fence(4, "agent", "local");
-
- __builtin_amdgcn_masked_fence(3, "workgroup", "local");
-}
-
-// CHECK-LABEL: define dso_local void @_Z11test_globalv(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META2:![0-9]+]]
-// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META2]]
-// CHECK-NEXT: fence seq_cst, !mmra [[META2]]
-// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META2]]
-// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META2]]
-// CHECK-NEXT: ret void
-//
-void test_global() {
- __builtin_amdgcn_masked_fence( __ATOMIC_SEQ_CST, "workgroup", "global");
-
- __builtin_amdgcn_masked_fence(__ATOMIC_ACQUIRE, "agent", "global");
-
- __builtin_amdgcn_masked_fence(__ATOMIC_SEQ_CST, "", "global");
-
- __builtin_amdgcn_masked_fence(4, "agent", "global");
-
- __builtin_amdgcn_masked_fence(3, "workgroup", "global");
-}
-
-// CHECK-LABEL: define dso_local void @_Z10test_imagev(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]]
-// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META3]]
-// CHECK-NEXT: fence seq_cst, !mmra [[META3]]
-// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
-// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
-// CHECK-NEXT: ret void
-//
-void test_image() {
- __builtin_amdgcn_masked_fence( __ATOMIC_SEQ_CST, "workgroup", "image");
-
- __builtin_amdgcn_masked_fence(__ATOMIC_ACQUIRE, "agent", "image");
-
- __builtin_amdgcn_masked_fence(__ATOMIC_SEQ_CST, "", "image");
-
- __builtin_amdgcn_masked_fence(4, "agent", "image");
-
- __builtin_amdgcn_masked_fence(3, "workgroup", "image");
-}
-
-// CHECK-LABEL: define dso_local void @_Z10test_mixedv(
-// CHECK-SAME: ) #[[ATTR0]] {
-// CHECK-NEXT: entry:
-// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]]
-// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
-// CHECK-NEXT: ret void
-//
-void test_mixed() {
- __builtin_amdgcn_masked_fence( __ATOMIC_SEQ_CST, "workgroup", "image", "global");
- __builtin_amdgcn_masked_fence( __ATOMIC_SEQ_CST, "workgroup", "image", "local", "global");
-}
-//.
-// CHECK: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-//.
-// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-// CHECK: [[META1]] = !{!"opencl-fence-mem", !"local"}
-// CHECK: [[META2]] = !{!"opencl-fence-mem", !"global"}
-// CHECK: [[META3]] = !{!"opencl-fence-mem", !"image"}
-// CHECK: [[META4]] = !{[[META3]], [[META2]]}
-// CHECK: [[META5]] = !{[[META3]], [[META1]], [[META2]]}
-//.
diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
index 630e416b893f4..6697ad65d5c6e 100644
--- a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
+++ b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
@@ -1,22 +1,113 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
-// RUN: -triple=amdgcn-amd-amdhsa | opt -S | FileCheck %s
+// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s
+// CHECK-LABEL: define dso_local void @_Z25test_memory_fence_successv(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: fence syncscope("workgroup") seq_cst
+// CHECK-NEXT: fence syncscope("agent") acquire
+// CHECK-NEXT: fence seq_cst
+// CHECK-NEXT: fence syncscope("agent") acq_rel
+// CHECK-NEXT: fence syncscope("workgroup") release
+// CHECK-NEXT: ret void
+//
void test_memory_fence_success() {
- // CHECK-LABEL: test_memory_fence_success
- // CHECK: fence syncscope("workgroup") seq_cst
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
- // CHECK: fence syncscope("agent") acquire
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
- // CHECK: fence seq_cst
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
- // CHECK: fence syncscope("agent") acq_rel
__builtin_amdgcn_fence(4, "agent");
- // CHECK: fence syncscope("workgroup") release
__builtin_amdgcn_fence(3, "workgroup");
}
+
+// CHECK-LABEL: define dso_local void @_Z10test_localv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]]
+// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META3]]
+// CHECK-NEXT: fence seq_cst, !mmra [[META3]]
+// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
+// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
+// CHECK-NEXT: ret void
+//
+void test_local() {
+ __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local");
+
+ __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent", "local");
+
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "", "local");
+
+ __builtin_amdgcn_fence(4, "agent", "local");
+
+ __builtin_amdgcn_fence(3, "workgroup", "local");
+}
+
+
+// CHECK-LABEL: define dso_local void @_Z11test_globalv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]]
+// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META4]]
+// CHECK-NEXT: fence seq_cst, !mmra [[META4]]
+// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]]
+// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META4]]
+// CHECK-NEXT: ret void
+//
+void test_global() {
+ __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "global");
+
+ __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent", "global");
+
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "", "global");
+
+ __builtin_amdgcn_fence(4, "agent", "global");
+
+ __builtin_amdgcn_fence(3, "workgroup", "global");
+}
+
+// CHECK-LABEL: define dso_local void @_Z10test_imagev(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
+// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META5]]
+// CHECK-NEXT: fence seq_cst, !mmra [[META5]]
+// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META5]]
+// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META5]]
+// CHECK-NEXT: ret void
+//
+void test_image() {
+ __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "image");
+
+ __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent", "image");
+
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "", "image");
+
+ __builtin_amdgcn_fence(4, "agent", "image");
+
+ __builtin_amdgcn_fence(3, "workgroup", "image");
+}
+
+// CHECK-LABEL: define dso_local void @_Z10test_mixedv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META6:![0-9]+]]
+// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META7:![0-9]+]]
+// CHECK-NEXT: ret void
+//
+void test_mixed() {
+ __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "image", "global");
+ __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "image", "local", "global");
+}
+//.
+// CHECK: [[META3]] = !{!"amdgpu-as", !"local"}
+// CHECK: [[META4]] = !{!"amdgpu-as", !"global"}
+// CHECK: [[META5]] = !{!"amdgpu-as", !"image"}
+// CHECK: [[META6]] = !{[[META5]], [[META4]]}
+// CHECK: [[META7]] = !{[[META5]], [[META3]], [[META4]]}
+//.
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index cba398b2be6be..0aafeb50297b6 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -679,26 +679,57 @@ class SIMemoryLegalizer final : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;
};
-/// Reads \p MI's MMRAs to parse the "opencl-fence-mem" MMRA.
-/// If this tag isn't present, or if it has no meaningful value, returns \p
+static std::array<std::pair<StringLiteral, SIAtomicAddrSpace>, 3> ASNames = {{
+ {"global", SIAtomicAddrSpace::GLOBAL},
+ {"local", SIAtomicAddrSpace::LDS},
+ {"image", SIAtomicAddrSpace::SCRATCH},
+}};
+
+void diagnoseUnknownMMRAASName(const MachineInstr &MI, StringRef AS) {
+ const MachineFunction *MF = MI.getMF();
+ const Function &Fn = MF->getFunction();
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << "unknown address space '" << AS << "'; expected one of ";
+ bool IsFirst = true;
+ for (const auto &[Name, Val] : ASNames) {
+ if (IsFirst)
+ IsFirst = false;
+ else
+ OS << ", ";
+ OS << '\'' << Name << '\'';
+ }
+ DiagnosticInfoUnsupported BadTag(Fn, Str, MI.getDebugLoc(), DS_Warning);
+ Fn.getContext().diagnose(BadTag);
+}
+
+/// Reads \p MI's MMRAs to parse the "amdgpu-as" MMRA.
+/// If this tag isn't present, or if it has no meaningful values, returns \p
/// Default. Otherwise returns all the address spaces concerned by the MMRA.
-static SIAtomicAddrSpace
-getOpenCLFenceAddrSpaceMMRA(const MachineInstr &MI, SIAtomicAddrSpace Default) {
- static constexpr const char *Prefix = "opencl-fence-mem";
+static SIAtomicAddrSpace getFenceAddrSpaceMMRA(const MachineInstr &MI,
+ SIAtomicAddrSpace Default) {
+ static constexpr StringLiteral FenceASPrefix = "amdgpu-as";
auto MMRA = MMRAMetadata(MI.getMMRAMetadata());
if (!MMRA)
return Default;
- SIAtomicAddrSpace AS = SIAtomicAddrSpace::NONE;
- if (MMRA.hasTag(Prefix, "global"))
- AS |= SIAtomicAddrSpace::GLOBAL;
- if (MMRA.hasTag(Prefix, "local"))
- AS |= SIAtomicAddrSpace::LDS;
- if (MMRA.hasTag(Prefix, "image"))
- AS |= SIAtomicAddrSpace::SCRATCH;
+ SIAtomicAddrSpace Result = SIAtomicAddrSpace::NONE;
+ for (const auto &[Prefix, Suffix] : MMRA) {
+ if (Prefix != FenceASPrefix)
+ continue;
+
+ auto It = find_if(ASNames, [Suffix = Suffix](auto &Pair) {
+ return Pair.first == Suffix;
+ });
+
+ if (It != ASNames.end())
+ Result |= It->second;
+ else
+ diagnoseUnknownMMRAASName(MI, Suffix);
+ }
- return (AS != SIAtomicAddrSpace::NONE) ? AS : Default;
+ return (Result != SIAtomicAddrSpace::NONE) ? Result : Default;
}
} // end namespace anonymous
@@ -2561,10 +2592,8 @@ bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
// Refine fenced address space based on MMRAs.
//
// TODO: Should we support this MMRA on other atomic operations?
- // Frontend doesn't directly emit those. Can optimizations merge
- // an atomic load w/ a fence and give us, e.g. load acquire with this MMRA?
auto OrderingAddrSpace =
- getOpenCLFenceAddrSpaceMMRA(*MI, MOI.getOrderingAddrSpace());
+ getFenceAddrSpaceMMRA(*MI, MOI.getOrderingAddrSpace());
if (MOI.isAtomic()) {
if (MOI.getOrdering() == AtomicOrdering::Acquire)
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
index 1ddfab7acc659..ed7b6f8805e27 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
@@ -60,7 +60,7 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup") acquire, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("workgroup") acquire, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -109,7 +109,7 @@ define amdgpu_kernel void @workgroup_release_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup") release, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("workgroup") release, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -162,7 +162,7 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup") acq_rel, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -215,7 +215,7 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup") seq_cst, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -268,7 +268,7 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup-one-as") acquire, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("workgroup-one-as") acquire, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -317,7 +317,7 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup-one-as") release, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("workgroup-one-as") release, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -370,7 +370,7 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -423,7 +423,7 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -486,7 +486,7 @@ define amdgpu_kernel void @agent_acquire_fence() {
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent") acquire, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("agent") acquire, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -537,7 +537,7 @@ define amdgpu_kernel void @agent_release_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent") release, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("agent") release, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -602,7 +602,7 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent") acq_rel, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("agent") acq_rel, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -667,7 +667,7 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent") seq_cst, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("agent") seq_cst, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -730,7 +730,7 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent-one-as") acquire, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("agent-one-as") acquire, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -781,7 +781,7 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent-one-as") release, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("agent-one-as") release, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -846,7 +846,7 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent-one-as") acq_rel, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("agent-one-as") acq_rel, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -911,7 +911,7 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent-one-as") seq_cst, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("agent-one-as") seq_cst, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -976,7 +976,7 @@ define amdgpu_kernel void @system_acquire_fence() {
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
entry:
- fence acquire, !mmra !{!"opencl-fence-mem", !"global"}
+ fence acquire, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -1029,7 +1029,7 @@ define amdgpu_kernel void @system_release_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence release, !mmra !{!"opencl-fence-mem", !"global"}
+ fence release, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -1098,7 +1098,7 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
entry:
- fence acq_rel, !mmra !{!"opencl-fence-mem", !"global"}
+ fence acq_rel, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -1167,7 +1167,7 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
entry:
- fence seq_cst, !mmra !{!"opencl-fence-mem", !"global"}
+ fence seq_cst, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -1232,7 +1232,7 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("one-as") acquire, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("one-as") acquire, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -1285,7 +1285,7 @@ define amdgpu_kernel void @system_one_as_release_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("one-as") release, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("one-as") release, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -1354,7 +1354,7 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("one-as") acq_rel, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("one-as") acq_rel, !mmra !{!"amdgpu-as", !"global"}
ret void
}
@@ -1423,6 +1423,6 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("one-as") seq_cst, !mmra !{!"opencl-fence-mem", !"global"}
+ fence syncscope("one-as") seq_cst, !mmra !{!"amdgpu-as", !"global"}
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
index b02b53342e9e8..d9753212e88d3 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
@@ -56,7 +56,7 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup") acquire, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("workgroup") acquire, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -105,7 +105,7 @@ define amdgpu_kernel void @workgroup_release_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup") release, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("workgroup") release, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -154,7 +154,7 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup") acq_rel, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -203,7 +203,7 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup") seq_cst, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -252,7 +252,7 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup-one-as") acquire, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("workgroup-one-as") acquire, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -301,7 +301,7 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup-one-as") release, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("workgroup-one-as") release, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -350,7 +350,7 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -399,7 +399,7 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -448,7 +448,7 @@ define amdgpu_kernel void @agent_acquire_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent") acquire, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("agent") acquire, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -497,7 +497,7 @@ define amdgpu_kernel void @agent_release_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent") release, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("agent") release, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -546,7 +546,7 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent") acq_rel, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("agent") acq_rel, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -595,7 +595,7 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent") seq_cst, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("agent") seq_cst, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -644,7 +644,7 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent-one-as") acquire, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("agent-one-as") acquire, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -693,7 +693,7 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent-one-as") release, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("agent-one-as") release, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -742,7 +742,7 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent-one-as") acq_rel, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("agent-one-as") acq_rel, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -791,7 +791,7 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent-one-as") seq_cst, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("agent-one-as") seq_cst, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -840,7 +840,7 @@ define amdgpu_kernel void @system_acquire_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence acquire, !mmra !{!"opencl-fence-mem", !"local"}
+ fence acquire, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -889,7 +889,7 @@ define amdgpu_kernel void @system_release_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence release, !mmra !{!"opencl-fence-mem", !"local"}
+ fence release, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -938,7 +938,7 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence acq_rel, !mmra !{!"opencl-fence-mem", !"local"}
+ fence acq_rel, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -987,7 +987,7 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence seq_cst, !mmra !{!"opencl-fence-mem", !"local"}
+ fence seq_cst, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -1036,7 +1036,7 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("one-as") acquire, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("one-as") acquire, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -1085,7 +1085,7 @@ define amdgpu_kernel void @system_one_as_release_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("one-as") release, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("one-as") release, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -1134,7 +1134,7 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("one-as") acq_rel, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("one-as") acq_rel, !mmra !{!"amdgpu-as", !"local"}
ret void
}
@@ -1183,6 +1183,6 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("one-as") seq_cst, !mmra !{!"opencl-fence-mem", !"local"}
+ fence syncscope("one-as") seq_cst, !mmra !{!"amdgpu-as", !"local"}
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-private.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-private.ll
index 0e3c258f52c00..4c69b019eb934 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-private.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-private.ll
@@ -56,7 +56,7 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup") acquire, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("workgroup") acquire, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -105,7 +105,7 @@ define amdgpu_kernel void @workgroup_release_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup") release, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("workgroup") release, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -154,7 +154,7 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup") acq_rel, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -203,7 +203,7 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup") seq_cst, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -252,7 +252,7 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup-one-as") acquire, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("workgroup-one-as") acquire, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -301,7 +301,7 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup-one-as") release, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("workgroup-one-as") release, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -350,7 +350,7 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -399,7 +399,7 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -448,7 +448,7 @@ define amdgpu_kernel void @agent_acquire_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent") acquire, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("agent") acquire, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -497,7 +497,7 @@ define amdgpu_kernel void @agent_release_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent") release, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("agent") release, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -546,7 +546,7 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent") acq_rel, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("agent") acq_rel, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -595,7 +595,7 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent") seq_cst, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("agent") seq_cst, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -644,7 +644,7 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent-one-as") acquire, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("agent-one-as") acquire, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -693,7 +693,7 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent-one-as") release, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("agent-one-as") release, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -742,7 +742,7 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent-one-as") acq_rel, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("agent-one-as") acq_rel, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -791,7 +791,7 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("agent-one-as") seq_cst, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("agent-one-as") seq_cst, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -840,7 +840,7 @@ define amdgpu_kernel void @system_acquire_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence acquire, !mmra !{!"opencl-fence-mem", !"image"}
+ fence acquire, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -889,7 +889,7 @@ define amdgpu_kernel void @system_release_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence release, !mmra !{!"opencl-fence-mem", !"image"}
+ fence release, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -938,7 +938,7 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence acq_rel, !mmra !{!"opencl-fence-mem", !"image"}
+ fence acq_rel, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -987,7 +987,7 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence seq_cst, !mmra !{!"opencl-fence-mem", !"image"}
+ fence seq_cst, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -1036,7 +1036,7 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("one-as") acquire, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("one-as") acquire, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -1085,7 +1085,7 @@ define amdgpu_kernel void @system_one_as_release_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("one-as") release, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("one-as") release, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -1134,7 +1134,7 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("one-as") acq_rel, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("one-as") acq_rel, !mmra !{!"amdgpu-as", !"image"}
ret void
}
@@ -1183,6 +1183,6 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
entry:
- fence syncscope("one-as") seq_cst, !mmra !{!"opencl-fence-mem", !"image"}
+ fence syncscope("one-as") seq_cst, !mmra !{!"amdgpu-as", !"image"}
ret void
}
>From c106a8cb3ad429897106aa9835d0f931e1393174 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Fri, 3 May 2024 10:20:29 +0200
Subject: [PATCH 6/9] Don't check AS names in frontend
---
clang/lib/CodeGen/CGBuiltin.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e847a30ed4bbc..3ae0dddbf8efd 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18423,14 +18423,13 @@ void CodeGenFunction::AddAMDGCNFenceAddressSpaceMMRA(llvm::Instruction *Inst,
for (unsigned K = 2; K < E->getNumArgs(); ++K) {
llvm::Value *V = EmitScalarExpr(E->getArg(K));
StringRef AS;
- if (llvm::getConstantStringInfo(V, AS) &&
- (AS == "local" || AS == "global" || AS == "image")) {
+ if (llvm::getConstantStringInfo(V, AS)) {
MMRAs.push_back({Tag, AS});
// TODO: Delete the resulting unused constant?
continue;
}
CGM.Error(E->getExprLoc(),
- "expected one of \"local\", \"global\" or \"image\"");
+ "expected an address space name as a string literal");
}
llvm::unique(MMRAs);
>From 2494ad3ed67665252b920d8dd012fe6929f1308e Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Mon, 13 May 2024 10:03:07 +0200
Subject: [PATCH 7/9] correctly use unique + comments
---
clang/docs/LanguageExtensions.rst | 3 ++-
clang/lib/CodeGen/CGBuiltin.cpp | 7 ++++---
clang/lib/CodeGen/CodeGenFunction.h | 2 +-
clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp | 6 +++---
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 12 ++++--------
5 files changed, 14 insertions(+), 16 deletions(-)
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 8c54452ab0355..2ab93887ddcba 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -4432,7 +4432,8 @@ The address spaces arguments must be string literals with known values, such as:
* ``"image"``
If one or more address space name are provided, the code generator will attempt
-to emit potentially faster instructions that only fence those address spaces.
+to emit potentially faster instructions that order access to at least those
+address spaces.
Emitting such instructions may not always be possible and the compiler is free
to fence more aggressively.
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 3ae0dddbf8efd..f8555d86e5044 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18414,7 +18414,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
return nullptr;
}
-void CodeGenFunction::AddAMDGCNFenceAddressSpaceMMRA(llvm::Instruction *Inst,
+void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
const CallExpr *E) {
constexpr const char *Tag = "amdgpu-as";
@@ -18432,7 +18432,8 @@ void CodeGenFunction::AddAMDGCNFenceAddressSpaceMMRA(llvm::Instruction *Inst,
"expected an address space name as a string literal");
}
- llvm::unique(MMRAs);
+ llvm::sort(MMRAs);
+ MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
}
@@ -19108,7 +19109,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
EmitScalarExpr(E->getArg(1)), AO, SSID);
FenceInst *Fence = Builder.CreateFence(AO, SSID);
if (E->getNumArgs() > 2)
- AddAMDGCNFenceAddressSpaceMMRA(Fence, E);
+ AddAMDGPUFenceAddressSpaceMMRA(Fence, E);
return Fence;
}
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 1a6610146b228..3f3fdeceedf16 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4632,7 +4632,7 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::Value *EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue);
- void AddAMDGCNFenceAddressSpaceMMRA(llvm::Instruction *Inst,
+ void AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
const CallExpr *E);
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope,
llvm::AtomicOrdering &AO,
diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
index 6697ad65d5c6e..eae831e7c8431 100644
--- a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
+++ b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
@@ -102,12 +102,12 @@ void test_image() {
//
void test_mixed() {
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "image", "global");
- __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "image", "local", "global");
+ __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "image", "local", "global", "image", "image");
}
//.
// CHECK: [[META3]] = !{!"amdgpu-as", !"local"}
// CHECK: [[META4]] = !{!"amdgpu-as", !"global"}
// CHECK: [[META5]] = !{!"amdgpu-as", !"image"}
-// CHECK: [[META6]] = !{[[META5]], [[META4]]}
-// CHECK: [[META7]] = !{[[META5]], [[META3]], [[META4]]}
+// CHECK: [[META6]] = !{[[META4]], [[META5]]}
+// CHECK: [[META7]] = !{[[META4]], [[META5]], [[META3]]}
//.
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 0aafeb50297b6..bed1801fc9303 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -18,6 +18,7 @@
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -691,14 +692,9 @@ void diagnoseUnknownMMRAASName(const MachineInstr &MI, StringRef AS) {
std::string Str;
raw_string_ostream OS(Str);
OS << "unknown address space '" << AS << "'; expected one of ";
- bool IsFirst = true;
- for (const auto &[Name, Val] : ASNames) {
- if (IsFirst)
- IsFirst = false;
- else
- OS << ", ";
- OS << '\'' << Name << '\'';
- }
+ ListSeparator LS;
+ for (const auto &[Name, Val] : ASNames)
+ OS << LS << '\'' << Name << '\'';
DiagnosticInfoUnsupported BadTag(Fn, Str, MI.getDebugLoc(), DS_Warning);
Fn.getContext().diagnose(BadTag);
}
>From 8f8ae2a57888b86c1841ba5049b22bbf47aba505 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 14 May 2024 08:35:30 +0200
Subject: [PATCH 8/9] update test, use stringmap
---
clang/test/SemaOpenCL/builtins-amdgcn-error.cl | 4 ++--
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 8 ++------
2 files changed, 4 insertions(+), 8 deletions(-)
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl
index b044763edcf00..7a550f026bc1b 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -155,8 +155,8 @@ void test_ds_fmaxf(local float *out, float src, int a) {
void test_fence() {
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
- __builtin_amdgcn_fence(4); // expected-error {{too few arguments to function call, expected 2}}
- __builtin_amdgcn_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}}
+ __builtin_amdgcn_fence(4); // expected-error {{too few arguments to function call, expected at least 2, have 1}}
+ __builtin_amdgcn_fence(4, 4, 4); // expected-error {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}}
__builtin_amdgcn_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, 5); // expected-error {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}}
const char ptr[] = "workgroup";
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index bed1801fc9303..bd99fddbbe5e2 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -680,7 +680,7 @@ class SIMemoryLegalizer final : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;
};
-static std::array<std::pair<StringLiteral, SIAtomicAddrSpace>, 3> ASNames = {{
+static const StringMap<SIAtomicAddrSpace> ASNames = {{
{"global", SIAtomicAddrSpace::GLOBAL},
{"local", SIAtomicAddrSpace::LDS},
{"image", SIAtomicAddrSpace::SCRATCH},
@@ -715,11 +715,7 @@ static SIAtomicAddrSpace getFenceAddrSpaceMMRA(const MachineInstr &MI,
if (Prefix != FenceASPrefix)
continue;
- auto It = find_if(ASNames, [Suffix = Suffix](auto &Pair) {
- return Pair.first == Suffix;
- });
-
- if (It != ASNames.end())
+ if (auto It = ASNames.find(Suffix); It != ASNames.end())
Result |= It->second;
else
diagnoseUnknownMMRAASName(MI, Suffix);
>From 1f29733e29c78081e83904e1ca330f1a92d596ec Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Wed, 22 May 2024 10:45:50 +0200
Subject: [PATCH 9/9] Remove "private", use O0 in tests
---
clang/docs/LanguageExtensions.rst | 3 +-
.../test/CodeGenCXX/builtin-amdgcn-fence.cpp | 32 +-
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 1 -
.../memory-legalizer-fence-mmra-global.ll | 310 ++++-
.../memory-legalizer-fence-mmra-local.ll | 130 +-
.../memory-legalizer-fence-mmra-private.ll | 1188 -----------------
6 files changed, 434 insertions(+), 1230 deletions(-)
delete mode 100644 llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-private.ll
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 2ab93887ddcba..46f99d0bbdd06 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -4425,11 +4425,10 @@ __builtin_amdgcn_fence
* ``const char *`` synchronization scope, e.g. ``workgroup``
* Zero or more ``const char *`` address spaces names.
-The address spaces arguments must be string literals with known values, such as:
+The address spaces arguments must be one of the following string literals:
* ``"local"``
* ``"global"``
-* ``"image"``
If one or more address space name are provided, the code generator will attempt
to emit potentially faster instructions that order access to at least those
diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
index eae831e7c8431..3af5a21ba0cd5 100644
--- a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
+++ b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
@@ -74,40 +74,38 @@ void test_global() {
// CHECK-LABEL: define dso_local void @_Z10test_imagev(
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
-// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META5]]
-// CHECK-NEXT: fence seq_cst, !mmra [[META5]]
-// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META5]]
-// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META5]]
+// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3]]
+// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META3]]
+// CHECK-NEXT: fence seq_cst, !mmra [[META3]]
+// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
+// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
// CHECK-NEXT: ret void
//
void test_image() {
- __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "image");
+ __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local");
- __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent", "image");
+ __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent", "local");
- __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "", "image");
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "", "local");
- __builtin_amdgcn_fence(4, "agent", "image");
+ __builtin_amdgcn_fence(4, "agent", "local");
- __builtin_amdgcn_fence(3, "workgroup", "image");
+ __builtin_amdgcn_fence(3, "workgroup", "local");
}
// CHECK-LABEL: define dso_local void @_Z10test_mixedv(
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META6:![0-9]+]]
-// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META7:![0-9]+]]
+// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
+// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]]
// CHECK-NEXT: ret void
//
void test_mixed() {
- __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "image", "global");
- __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "image", "local", "global", "image", "image");
+ __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "global");
+ __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "local", "global", "local", "local");
}
//.
// CHECK: [[META3]] = !{!"amdgpu-as", !"local"}
// CHECK: [[META4]] = !{!"amdgpu-as", !"global"}
-// CHECK: [[META5]] = !{!"amdgpu-as", !"image"}
-// CHECK: [[META6]] = !{[[META4]], [[META5]]}
-// CHECK: [[META7]] = !{[[META4]], [[META5]], [[META3]]}
+// CHECK: [[META5]] = !{[[META4]], [[META3]]}
//.
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index bd99fddbbe5e2..ea7d021bc1831 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -683,7 +683,6 @@ class SIMemoryLegalizer final : public MachineFunctionPass {
static const StringMap<SIAtomicAddrSpace> ASNames = {{
{"global", SIAtomicAddrSpace::GLOBAL},
{"local", SIAtomicAddrSpace::LDS},
- {"image", SIAtomicAddrSpace::SCRATCH},
}};
void diagnoseUnknownMMRAASName(const MachineInstr &MI, StringRef AS) {
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
index ed7b6f8805e27..da9bc6b331134 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
@@ -1,15 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX6 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX10-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX6 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX10-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX6-LABEL: workgroup_acquire_fence:
@@ -22,6 +22,8 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
;
; GFX10-WGP-LABEL: workgroup_acquire_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
@@ -39,6 +41,7 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
;
; GFX90A-TGSPLIT-LABEL: workgroup_acquire_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
@@ -48,11 +51,14 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
;
; GFX940-TGSPLIT-LABEL: workgroup_acquire_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc0
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: workgroup_acquire_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: s_endpgm
;
@@ -75,6 +81,8 @@ define amdgpu_kernel void @workgroup_release_fence() {
;
; GFX10-WGP-LABEL: workgroup_release_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: workgroup_release_fence:
@@ -91,6 +99,7 @@ define amdgpu_kernel void @workgroup_release_fence() {
;
; GFX90A-TGSPLIT-LABEL: workgroup_release_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
; GFX940-NOTTGSPLIT-LABEL: workgroup_release_fence:
@@ -99,10 +108,13 @@ define amdgpu_kernel void @workgroup_release_fence() {
;
; GFX940-TGSPLIT-LABEL: workgroup_release_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: workgroup_release_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: workgroup_release_fence:
@@ -124,6 +136,8 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
;
; GFX10-WGP-LABEL: workgroup_acq_rel_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
@@ -141,6 +155,7 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
;
; GFX90A-TGSPLIT-LABEL: workgroup_acq_rel_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
@@ -150,11 +165,14 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
;
; GFX940-TGSPLIT-LABEL: workgroup_acq_rel_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc0
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: workgroup_acq_rel_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: s_endpgm
;
@@ -177,6 +195,8 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
;
; GFX10-WGP-LABEL: workgroup_seq_cst_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
@@ -194,6 +214,7 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
;
; GFX90A-TGSPLIT-LABEL: workgroup_seq_cst_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
@@ -203,11 +224,14 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
;
; GFX940-TGSPLIT-LABEL: workgroup_seq_cst_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc0
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: workgroup_seq_cst_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: s_endpgm
;
@@ -230,6 +254,8 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
;
; GFX10-WGP-LABEL: workgroup_one_as_acquire_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
@@ -247,6 +273,7 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
;
; GFX90A-TGSPLIT-LABEL: workgroup_one_as_acquire_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
@@ -256,11 +283,14 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
;
; GFX940-TGSPLIT-LABEL: workgroup_one_as_acquire_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc0
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: workgroup_one_as_acquire_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: s_endpgm
;
@@ -283,6 +313,8 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
;
; GFX10-WGP-LABEL: workgroup_one_as_release_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: workgroup_one_as_release_fence:
@@ -299,6 +331,7 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
;
; GFX90A-TGSPLIT-LABEL: workgroup_one_as_release_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence:
@@ -307,10 +340,13 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
;
; GFX940-TGSPLIT-LABEL: workgroup_one_as_release_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: workgroup_one_as_release_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: workgroup_one_as_release_fence:
@@ -332,6 +368,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
;
; GFX10-WGP-LABEL: workgroup_one_as_acq_rel_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
@@ -349,6 +387,7 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
;
; GFX90A-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
@@ -358,11 +397,14 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
;
; GFX940-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc0
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: workgroup_one_as_acq_rel_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: s_endpgm
;
@@ -385,6 +427,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
;
; GFX10-WGP-LABEL: workgroup_one_as_seq_cst_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
@@ -402,6 +446,7 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
;
; GFX90A-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
@@ -411,11 +456,14 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
;
; GFX940-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc0
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: workgroup_one_as_seq_cst_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: s_endpgm
;
@@ -430,58 +478,73 @@ entry:
define amdgpu_kernel void @agent_acquire_fence() {
; GFX6-LABEL: agent_acquire_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: agent_acquire_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1_vol
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: agent_acquire_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: buffer_gl1_inv
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: agent_acquire_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CU-NEXT: buffer_gl1_inv
; GFX10-CU-NEXT: buffer_gl0_inv
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: agent_acquire_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: agent_acquire_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: agent_acquire_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
; GFX940-NOTTGSPLIT-LABEL: agent_acquire_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: agent_acquire_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc1
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: agent_acquire_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: agent_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
@@ -493,48 +556,63 @@ entry:
define amdgpu_kernel void @agent_release_fence() {
; GFX6-LABEL: agent_release_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: agent_release_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: agent_release_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: agent_release_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: agent_release_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: agent_release_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: agent_release_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
; GFX940-NOTTGSPLIT-LABEL: agent_release_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: agent_release_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: agent_release_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: agent_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") release, !mmra !{!"amdgpu-as", !"global"}
@@ -544,60 +622,75 @@ entry:
define amdgpu_kernel void @agent_acq_rel_fence() {
; GFX6-LABEL: agent_acq_rel_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: agent_acq_rel_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1_vol
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: agent_acq_rel_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: buffer_gl1_inv
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: agent_acq_rel_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CU-NEXT: buffer_gl1_inv
; GFX10-CU-NEXT: buffer_gl0_inv
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: agent_acq_rel_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: agent_acq_rel_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: agent_acq_rel_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
; GFX940-NOTTGSPLIT-LABEL: agent_acq_rel_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: agent_acq_rel_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc1
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: agent_acq_rel_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: agent_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
@@ -609,60 +702,75 @@ entry:
define amdgpu_kernel void @agent_seq_cst_fence() {
; GFX6-LABEL: agent_seq_cst_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: agent_seq_cst_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1_vol
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: agent_seq_cst_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: buffer_gl1_inv
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: agent_seq_cst_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CU-NEXT: buffer_gl1_inv
; GFX10-CU-NEXT: buffer_gl0_inv
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: agent_seq_cst_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: agent_seq_cst_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: agent_seq_cst_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
; GFX940-NOTTGSPLIT-LABEL: agent_seq_cst_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: agent_seq_cst_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc1
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: agent_seq_cst_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: agent_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
@@ -674,58 +782,73 @@ entry:
define amdgpu_kernel void @agent_one_as_acquire_fence() {
; GFX6-LABEL: agent_one_as_acquire_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: agent_one_as_acquire_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1_vol
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: agent_one_as_acquire_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: buffer_gl1_inv
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: agent_one_as_acquire_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CU-NEXT: buffer_gl1_inv
; GFX10-CU-NEXT: buffer_gl0_inv
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: agent_one_as_acquire_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: agent_one_as_acquire_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: agent_one_as_acquire_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc1
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: agent_one_as_acquire_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: agent_one_as_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
@@ -737,48 +860,63 @@ entry:
define amdgpu_kernel void @agent_one_as_release_fence() {
; GFX6-LABEL: agent_one_as_release_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: agent_one_as_release_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: agent_one_as_release_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: agent_one_as_release_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: agent_one_as_release_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_release_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: agent_one_as_release_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
; GFX940-NOTTGSPLIT-LABEL: agent_one_as_release_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: agent_one_as_release_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: agent_one_as_release_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: agent_one_as_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") release, !mmra !{!"amdgpu-as", !"global"}
@@ -788,60 +926,75 @@ entry:
define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
; GFX6-LABEL: agent_one_as_acq_rel_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: agent_one_as_acq_rel_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1_vol
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: agent_one_as_acq_rel_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: buffer_gl1_inv
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: agent_one_as_acq_rel_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CU-NEXT: buffer_gl1_inv
; GFX10-CU-NEXT: buffer_gl0_inv
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: agent_one_as_acq_rel_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: agent_one_as_acq_rel_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: agent_one_as_acq_rel_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc1
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: agent_one_as_acq_rel_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: agent_one_as_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
@@ -853,60 +1006,75 @@ entry:
define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
; GFX6-LABEL: agent_one_as_seq_cst_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: agent_one_as_seq_cst_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1_vol
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: agent_one_as_seq_cst_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: buffer_gl1_inv
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: agent_one_as_seq_cst_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CU-NEXT: buffer_gl1_inv
; GFX10-CU-NEXT: buffer_gl0_inv
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: agent_one_as_seq_cst_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: agent_one_as_seq_cst_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
; GFX940-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: agent_one_as_seq_cst_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc1
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: agent_one_as_seq_cst_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: agent_one_as_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
@@ -918,60 +1086,75 @@ entry:
define amdgpu_kernel void @system_acquire_fence() {
; GFX6-LABEL: system_acquire_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: system_acquire_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1_vol
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: system_acquire_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: buffer_gl1_inv
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: system_acquire_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CU-NEXT: buffer_gl1_inv
; GFX10-CU-NEXT: buffer_gl0_inv
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: system_acquire_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: system_acquire_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: system_acquire_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_invl2
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
; GFX940-NOTTGSPLIT-LABEL: system_acquire_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: system_acquire_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: system_acquire_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: system_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
@@ -983,50 +1166,65 @@ entry:
define amdgpu_kernel void @system_release_fence() {
; GFX6-LABEL: system_release_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: system_release_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: system_release_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: system_release_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: system_release_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: system_release_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: system_release_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
; GFX90A-TGSPLIT-NEXT: buffer_wbl2
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
; GFX940-NOTTGSPLIT-LABEL: system_release_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: system_release_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: system_release_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: system_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: s_endpgm
entry:
fence release, !mmra !{!"amdgpu-as", !"global"}
@@ -1036,33 +1234,41 @@ entry:
define amdgpu_kernel void @system_acq_rel_fence() {
; GFX6-LABEL: system_acq_rel_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: system_acq_rel_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1_vol
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: system_acq_rel_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: buffer_gl1_inv
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: system_acq_rel_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CU-NEXT: buffer_gl1_inv
; GFX10-CU-NEXT: buffer_gl0_inv
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: system_acq_rel_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: system_acq_rel_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
@@ -1070,6 +1276,7 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX90A-TGSPLIT-LABEL: system_acq_rel_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
; GFX90A-TGSPLIT-NEXT: buffer_wbl2
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_invl2
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-TGSPLIT-NEXT: s_endpgm
@@ -1077,23 +1284,29 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX940-NOTTGSPLIT-LABEL: system_acq_rel_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: system_acq_rel_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: system_acq_rel_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: system_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
@@ -1105,33 +1318,41 @@ entry:
define amdgpu_kernel void @system_seq_cst_fence() {
; GFX6-LABEL: system_seq_cst_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: system_seq_cst_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1_vol
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: system_seq_cst_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: buffer_gl1_inv
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: system_seq_cst_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CU-NEXT: buffer_gl1_inv
; GFX10-CU-NEXT: buffer_gl0_inv
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: system_seq_cst_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: system_seq_cst_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
@@ -1139,6 +1360,7 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX90A-TGSPLIT-LABEL: system_seq_cst_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
; GFX90A-TGSPLIT-NEXT: buffer_wbl2
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_invl2
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-TGSPLIT-NEXT: s_endpgm
@@ -1146,23 +1368,29 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX940-NOTTGSPLIT-LABEL: system_seq_cst_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: system_seq_cst_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: system_seq_cst_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: system_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
@@ -1174,60 +1402,75 @@ entry:
define amdgpu_kernel void @system_one_as_acquire_fence() {
; GFX6-LABEL: system_one_as_acquire_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: system_one_as_acquire_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1_vol
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: system_one_as_acquire_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: buffer_gl1_inv
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: system_one_as_acquire_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CU-NEXT: buffer_gl1_inv
; GFX10-CU-NEXT: buffer_gl0_inv
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: system_one_as_acquire_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acquire_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: system_one_as_acquire_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_invl2
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
; GFX940-NOTTGSPLIT-LABEL: system_one_as_acquire_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: system_one_as_acquire_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: system_one_as_acquire_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: system_one_as_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
@@ -1239,50 +1482,65 @@ entry:
define amdgpu_kernel void @system_one_as_release_fence() {
; GFX6-LABEL: system_one_as_release_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: system_one_as_release_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: system_one_as_release_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: system_one_as_release_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: system_one_as_release_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: system_one_as_release_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: system_one_as_release_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
; GFX90A-TGSPLIT-NEXT: buffer_wbl2
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: s_endpgm
;
; GFX940-NOTTGSPLIT-LABEL: system_one_as_release_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: system_one_as_release_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: system_one_as_release_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: system_one_as_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: s_endpgm
entry:
fence syncscope("one-as") release, !mmra !{!"amdgpu-as", !"global"}
@@ -1292,33 +1550,41 @@ entry:
define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX6-LABEL: system_one_as_acq_rel_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: system_one_as_acq_rel_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1_vol
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: system_one_as_acq_rel_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: buffer_gl1_inv
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: system_one_as_acq_rel_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CU-NEXT: buffer_gl1_inv
; GFX10-CU-NEXT: buffer_gl0_inv
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: system_one_as_acq_rel_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
@@ -1326,6 +1592,7 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX90A-TGSPLIT-LABEL: system_one_as_acq_rel_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
; GFX90A-TGSPLIT-NEXT: buffer_wbl2
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_invl2
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-TGSPLIT-NEXT: s_endpgm
@@ -1333,23 +1600,29 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX940-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: system_one_as_acq_rel_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: system_one_as_acq_rel_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: system_one_as_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
@@ -1361,33 +1634,41 @@ entry:
define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX6-LABEL: system_one_as_seq_cst_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_wbinvl1
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: system_one_as_seq_cst_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: buffer_wbinvl1_vol
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: system_one_as_seq_cst_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: buffer_gl1_inv
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: system_one_as_seq_cst_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CU-NEXT: buffer_gl1_inv
; GFX10-CU-NEXT: buffer_gl0_inv
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: system_one_as_seq_cst_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbl2
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: buffer_invl2
; GFX90A-NOTTGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
@@ -1395,6 +1676,7 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX90A-TGSPLIT-LABEL: system_one_as_seq_cst_fence:
; GFX90A-TGSPLIT: ; %bb.0: ; %entry
; GFX90A-TGSPLIT-NEXT: buffer_wbl2
+; GFX90A-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX90A-TGSPLIT-NEXT: buffer_invl2
; GFX90A-TGSPLIT-NEXT: buffer_wbinvl1_vol
; GFX90A-TGSPLIT-NEXT: s_endpgm
@@ -1402,23 +1684,29 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX940-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc0 sc1
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: system_one_as_seq_cst_fence:
; GFX940-TGSPLIT: ; %bb.0: ; %entry
; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1
+; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
; GFX940-TGSPLIT-NEXT: s_endpgm
;
; GFX11-WGP-LABEL: system_one_as_seq_cst_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
+; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-WGP-NEXT: buffer_gl1_inv
; GFX11-WGP-NEXT: buffer_gl0_inv
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: system_one_as_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: buffer_gl1_inv
; GFX11-CU-NEXT: buffer_gl0_inv
; GFX11-CU-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
index d9753212e88d3..601a6a60fe7b4 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
@@ -1,39 +1,45 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX6 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX10-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX6 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX10-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX6-LABEL: workgroup_acquire_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: workgroup_acquire_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: workgroup_acquire_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: workgroup_acquire_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: workgroup_acquire_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: workgroup_acquire_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: workgroup_acquire_fence:
@@ -42,6 +48,7 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
;
; GFX940-NOTTGSPLIT-LABEL: workgroup_acquire_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: workgroup_acquire_fence:
@@ -50,10 +57,12 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
;
; GFX11-WGP-LABEL: workgroup_acquire_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: workgroup_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire, !mmra !{!"amdgpu-as", !"local"}
@@ -63,26 +72,32 @@ entry:
define amdgpu_kernel void @workgroup_release_fence() {
; GFX6-LABEL: workgroup_release_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: workgroup_release_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: workgroup_release_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: workgroup_release_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: workgroup_release_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: workgroup_release_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: workgroup_release_fence:
@@ -91,6 +106,7 @@ define amdgpu_kernel void @workgroup_release_fence() {
;
; GFX940-NOTTGSPLIT-LABEL: workgroup_release_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: workgroup_release_fence:
@@ -99,10 +115,12 @@ define amdgpu_kernel void @workgroup_release_fence() {
;
; GFX11-WGP-LABEL: workgroup_release_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: workgroup_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release, !mmra !{!"amdgpu-as", !"local"}
@@ -112,26 +130,32 @@ entry:
define amdgpu_kernel void @workgroup_acq_rel_fence() {
; GFX6-LABEL: workgroup_acq_rel_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: workgroup_acq_rel_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: workgroup_acq_rel_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: workgroup_acq_rel_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: workgroup_acq_rel_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: workgroup_acq_rel_fence:
@@ -140,6 +164,7 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
;
; GFX940-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: workgroup_acq_rel_fence:
@@ -148,10 +173,12 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
;
; GFX11-WGP-LABEL: workgroup_acq_rel_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: workgroup_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-as", !"local"}
@@ -161,26 +188,32 @@ entry:
define amdgpu_kernel void @workgroup_seq_cst_fence() {
; GFX6-LABEL: workgroup_seq_cst_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: workgroup_seq_cst_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: workgroup_seq_cst_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: workgroup_seq_cst_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: workgroup_seq_cst_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: workgroup_seq_cst_fence:
@@ -189,6 +222,7 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
;
; GFX940-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: workgroup_seq_cst_fence:
@@ -197,10 +231,12 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
;
; GFX11-WGP-LABEL: workgroup_seq_cst_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: workgroup_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-as", !"local"}
@@ -406,26 +442,32 @@ entry:
define amdgpu_kernel void @agent_acquire_fence() {
; GFX6-LABEL: agent_acquire_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: agent_acquire_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: agent_acquire_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: agent_acquire_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: agent_acquire_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: agent_acquire_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: agent_acquire_fence:
@@ -434,6 +476,7 @@ define amdgpu_kernel void @agent_acquire_fence() {
;
; GFX940-NOTTGSPLIT-LABEL: agent_acquire_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: agent_acquire_fence:
@@ -442,10 +485,12 @@ define amdgpu_kernel void @agent_acquire_fence() {
;
; GFX11-WGP-LABEL: agent_acquire_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: agent_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") acquire, !mmra !{!"amdgpu-as", !"local"}
@@ -455,26 +500,32 @@ entry:
define amdgpu_kernel void @agent_release_fence() {
; GFX6-LABEL: agent_release_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: agent_release_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: agent_release_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: agent_release_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: agent_release_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: agent_release_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: agent_release_fence:
@@ -483,6 +534,7 @@ define amdgpu_kernel void @agent_release_fence() {
;
; GFX940-NOTTGSPLIT-LABEL: agent_release_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: agent_release_fence:
@@ -491,10 +543,12 @@ define amdgpu_kernel void @agent_release_fence() {
;
; GFX11-WGP-LABEL: agent_release_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: agent_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") release, !mmra !{!"amdgpu-as", !"local"}
@@ -504,26 +558,32 @@ entry:
define amdgpu_kernel void @agent_acq_rel_fence() {
; GFX6-LABEL: agent_acq_rel_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: agent_acq_rel_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: agent_acq_rel_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: agent_acq_rel_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: agent_acq_rel_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: agent_acq_rel_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: agent_acq_rel_fence:
@@ -532,6 +592,7 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
;
; GFX940-NOTTGSPLIT-LABEL: agent_acq_rel_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: agent_acq_rel_fence:
@@ -540,10 +601,12 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
;
; GFX11-WGP-LABEL: agent_acq_rel_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: agent_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") acq_rel, !mmra !{!"amdgpu-as", !"local"}
@@ -553,26 +616,32 @@ entry:
define amdgpu_kernel void @agent_seq_cst_fence() {
; GFX6-LABEL: agent_seq_cst_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: agent_seq_cst_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: agent_seq_cst_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: agent_seq_cst_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: agent_seq_cst_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: agent_seq_cst_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: agent_seq_cst_fence:
@@ -581,6 +650,7 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
;
; GFX940-NOTTGSPLIT-LABEL: agent_seq_cst_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: agent_seq_cst_fence:
@@ -589,10 +659,12 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
;
; GFX11-WGP-LABEL: agent_seq_cst_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: agent_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") seq_cst, !mmra !{!"amdgpu-as", !"local"}
@@ -798,26 +870,32 @@ entry:
define amdgpu_kernel void @system_acquire_fence() {
; GFX6-LABEL: system_acquire_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: system_acquire_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: system_acquire_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: system_acquire_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: system_acquire_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: system_acquire_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: system_acquire_fence:
@@ -826,6 +904,7 @@ define amdgpu_kernel void @system_acquire_fence() {
;
; GFX940-NOTTGSPLIT-LABEL: system_acquire_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: system_acquire_fence:
@@ -834,10 +913,12 @@ define amdgpu_kernel void @system_acquire_fence() {
;
; GFX11-WGP-LABEL: system_acquire_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: system_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: s_endpgm
entry:
fence acquire, !mmra !{!"amdgpu-as", !"local"}
@@ -847,26 +928,32 @@ entry:
define amdgpu_kernel void @system_release_fence() {
; GFX6-LABEL: system_release_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: system_release_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: system_release_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: system_release_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: system_release_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: system_release_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: system_release_fence:
@@ -875,6 +962,7 @@ define amdgpu_kernel void @system_release_fence() {
;
; GFX940-NOTTGSPLIT-LABEL: system_release_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: system_release_fence:
@@ -883,10 +971,12 @@ define amdgpu_kernel void @system_release_fence() {
;
; GFX11-WGP-LABEL: system_release_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: system_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: s_endpgm
entry:
fence release, !mmra !{!"amdgpu-as", !"local"}
@@ -896,26 +986,32 @@ entry:
define amdgpu_kernel void @system_acq_rel_fence() {
; GFX6-LABEL: system_acq_rel_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: system_acq_rel_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: system_acq_rel_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: system_acq_rel_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: system_acq_rel_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: system_acq_rel_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: system_acq_rel_fence:
@@ -924,6 +1020,7 @@ define amdgpu_kernel void @system_acq_rel_fence() {
;
; GFX940-NOTTGSPLIT-LABEL: system_acq_rel_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: system_acq_rel_fence:
@@ -932,10 +1029,12 @@ define amdgpu_kernel void @system_acq_rel_fence() {
;
; GFX11-WGP-LABEL: system_acq_rel_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: system_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: s_endpgm
entry:
fence acq_rel, !mmra !{!"amdgpu-as", !"local"}
@@ -945,26 +1044,32 @@ entry:
define amdgpu_kernel void @system_seq_cst_fence() {
; GFX6-LABEL: system_seq_cst_fence:
; GFX6: ; %bb.0: ; %entry
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: system_seq_cst_fence:
; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_endpgm
;
; GFX10-WGP-LABEL: system_seq_cst_fence:
; GFX10-WGP: ; %bb.0: ; %entry
+; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-CU-LABEL: system_seq_cst_fence:
; GFX10-CU: ; %bb.0: ; %entry
+; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-CU-NEXT: s_endpgm
;
; SKIP-CACHE-INV-LABEL: system_seq_cst_fence:
; SKIP-CACHE-INV: ; %bb.0: ; %entry
+; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
; SKIP-CACHE-INV-NEXT: s_endpgm
;
; GFX90A-NOTTGSPLIT-LABEL: system_seq_cst_fence:
; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX90A-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX90A-TGSPLIT-LABEL: system_seq_cst_fence:
@@ -973,6 +1078,7 @@ define amdgpu_kernel void @system_seq_cst_fence() {
;
; GFX940-NOTTGSPLIT-LABEL: system_seq_cst_fence:
; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
+; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NOTTGSPLIT-NEXT: s_endpgm
;
; GFX940-TGSPLIT-LABEL: system_seq_cst_fence:
@@ -981,10 +1087,12 @@ define amdgpu_kernel void @system_seq_cst_fence() {
;
; GFX11-WGP-LABEL: system_seq_cst_fence:
; GFX11-WGP: ; %bb.0: ; %entry
+; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: s_endpgm
;
; GFX11-CU-LABEL: system_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
+; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: s_endpgm
entry:
fence seq_cst, !mmra !{!"amdgpu-as", !"local"}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-private.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-private.ll
deleted file mode 100644
index 4c69b019eb934..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-private.ll
+++ /dev/null
@@ -1,1188 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX6 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX10-CU %s
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
-
-define amdgpu_kernel void @workgroup_acquire_fence() {
-; GFX6-LABEL: workgroup_acquire_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: workgroup_acquire_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: workgroup_acquire_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: workgroup_acquire_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: workgroup_acquire_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: workgroup_acquire_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: workgroup_acquire_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: workgroup_acquire_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: workgroup_acquire_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: workgroup_acquire_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: workgroup_acquire_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("workgroup") acquire, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @workgroup_release_fence() {
-; GFX6-LABEL: workgroup_release_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: workgroup_release_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: workgroup_release_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: workgroup_release_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: workgroup_release_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: workgroup_release_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: workgroup_release_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: workgroup_release_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: workgroup_release_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: workgroup_release_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: workgroup_release_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("workgroup") release, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @workgroup_acq_rel_fence() {
-; GFX6-LABEL: workgroup_acq_rel_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: workgroup_acq_rel_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: workgroup_acq_rel_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: workgroup_acq_rel_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: workgroup_acq_rel_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: workgroup_acq_rel_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: workgroup_acq_rel_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: workgroup_acq_rel_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: workgroup_acq_rel_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: workgroup_acq_rel_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @workgroup_seq_cst_fence() {
-; GFX6-LABEL: workgroup_seq_cst_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: workgroup_seq_cst_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: workgroup_seq_cst_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: workgroup_seq_cst_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: workgroup_seq_cst_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: workgroup_seq_cst_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: workgroup_seq_cst_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: workgroup_seq_cst_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: workgroup_seq_cst_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: workgroup_seq_cst_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
-; GFX6-LABEL: workgroup_one_as_acquire_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: workgroup_one_as_acquire_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: workgroup_one_as_acquire_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: workgroup_one_as_acquire_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: workgroup_one_as_acquire_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_acquire_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: workgroup_one_as_acquire_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_acquire_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: workgroup_one_as_acquire_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: workgroup_one_as_acquire_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: workgroup_one_as_acquire_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("workgroup-one-as") acquire, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @workgroup_one_as_release_fence() {
-; GFX6-LABEL: workgroup_one_as_release_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: workgroup_one_as_release_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: workgroup_one_as_release_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: workgroup_one_as_release_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: workgroup_one_as_release_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: workgroup_one_as_release_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_release_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: workgroup_one_as_release_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: workgroup_one_as_release_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: workgroup_one_as_release_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("workgroup-one-as") release, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
-; GFX6-LABEL: workgroup_one_as_acq_rel_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: workgroup_one_as_acq_rel_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: workgroup_one_as_acq_rel_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: workgroup_one_as_acq_rel_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: workgroup_one_as_acq_rel_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_acq_rel_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_acq_rel_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: workgroup_one_as_acq_rel_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: workgroup_one_as_acq_rel_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: workgroup_one_as_acq_rel_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
-; GFX6-LABEL: workgroup_one_as_seq_cst_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: workgroup_one_as_seq_cst_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: workgroup_one_as_seq_cst_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: workgroup_one_as_seq_cst_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: workgroup_one_as_seq_cst_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: workgroup_one_as_seq_cst_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: workgroup_one_as_seq_cst_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: workgroup_one_as_seq_cst_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: workgroup_one_as_seq_cst_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: workgroup_one_as_seq_cst_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @agent_acquire_fence() {
-; GFX6-LABEL: agent_acquire_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: agent_acquire_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: agent_acquire_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: agent_acquire_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: agent_acquire_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: agent_acquire_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: agent_acquire_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: agent_acquire_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: agent_acquire_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: agent_acquire_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: agent_acquire_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("agent") acquire, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @agent_release_fence() {
-; GFX6-LABEL: agent_release_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: agent_release_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: agent_release_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: agent_release_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: agent_release_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: agent_release_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: agent_release_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: agent_release_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: agent_release_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: agent_release_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: agent_release_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("agent") release, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @agent_acq_rel_fence() {
-; GFX6-LABEL: agent_acq_rel_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: agent_acq_rel_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: agent_acq_rel_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: agent_acq_rel_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: agent_acq_rel_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: agent_acq_rel_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: agent_acq_rel_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: agent_acq_rel_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: agent_acq_rel_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: agent_acq_rel_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: agent_acq_rel_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("agent") acq_rel, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @agent_seq_cst_fence() {
-; GFX6-LABEL: agent_seq_cst_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: agent_seq_cst_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: agent_seq_cst_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: agent_seq_cst_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: agent_seq_cst_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: agent_seq_cst_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: agent_seq_cst_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: agent_seq_cst_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: agent_seq_cst_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: agent_seq_cst_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: agent_seq_cst_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("agent") seq_cst, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @agent_one_as_acquire_fence() {
-; GFX6-LABEL: agent_one_as_acquire_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: agent_one_as_acquire_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: agent_one_as_acquire_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: agent_one_as_acquire_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: agent_one_as_acquire_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: agent_one_as_acquire_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acquire_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: agent_one_as_acquire_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: agent_one_as_acquire_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: agent_one_as_acquire_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("agent-one-as") acquire, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @agent_one_as_release_fence() {
-; GFX6-LABEL: agent_one_as_release_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: agent_one_as_release_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: agent_one_as_release_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: agent_one_as_release_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: agent_one_as_release_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_release_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: agent_one_as_release_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: agent_one_as_release_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: agent_one_as_release_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: agent_one_as_release_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: agent_one_as_release_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("agent-one-as") release, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
-; GFX6-LABEL: agent_one_as_acq_rel_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: agent_one_as_acq_rel_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: agent_one_as_acq_rel_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: agent_one_as_acq_rel_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: agent_one_as_acq_rel_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: agent_one_as_acq_rel_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: agent_one_as_acq_rel_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: agent_one_as_acq_rel_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: agent_one_as_acq_rel_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: agent_one_as_acq_rel_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("agent-one-as") acq_rel, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
-; GFX6-LABEL: agent_one_as_seq_cst_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: agent_one_as_seq_cst_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: agent_one_as_seq_cst_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: agent_one_as_seq_cst_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: agent_one_as_seq_cst_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: agent_one_as_seq_cst_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: agent_one_as_seq_cst_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: agent_one_as_seq_cst_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: agent_one_as_seq_cst_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: agent_one_as_seq_cst_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("agent-one-as") seq_cst, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @system_acquire_fence() {
-; GFX6-LABEL: system_acquire_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: system_acquire_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: system_acquire_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: system_acquire_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: system_acquire_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: system_acquire_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: system_acquire_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: system_acquire_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: system_acquire_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: system_acquire_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: system_acquire_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence acquire, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @system_release_fence() {
-; GFX6-LABEL: system_release_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: system_release_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: system_release_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: system_release_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: system_release_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: system_release_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: system_release_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: system_release_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: system_release_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: system_release_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: system_release_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence release, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @system_acq_rel_fence() {
-; GFX6-LABEL: system_acq_rel_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: system_acq_rel_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: system_acq_rel_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: system_acq_rel_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: system_acq_rel_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: system_acq_rel_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: system_acq_rel_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: system_acq_rel_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: system_acq_rel_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: system_acq_rel_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: system_acq_rel_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence acq_rel, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @system_seq_cst_fence() {
-; GFX6-LABEL: system_seq_cst_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: system_seq_cst_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: system_seq_cst_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: system_seq_cst_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: system_seq_cst_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: system_seq_cst_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: system_seq_cst_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: system_seq_cst_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: system_seq_cst_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: system_seq_cst_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: system_seq_cst_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence seq_cst, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @system_one_as_acquire_fence() {
-; GFX6-LABEL: system_one_as_acquire_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: system_one_as_acquire_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: system_one_as_acquire_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: system_one_as_acquire_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: system_one_as_acquire_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acquire_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: system_one_as_acquire_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: system_one_as_acquire_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: system_one_as_acquire_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: system_one_as_acquire_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: system_one_as_acquire_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("one-as") acquire, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @system_one_as_release_fence() {
-; GFX6-LABEL: system_one_as_release_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: system_one_as_release_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: system_one_as_release_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: system_one_as_release_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: system_one_as_release_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: system_one_as_release_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: system_one_as_release_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: system_one_as_release_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: system_one_as_release_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: system_one_as_release_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: system_one_as_release_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("one-as") release, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @system_one_as_acq_rel_fence() {
-; GFX6-LABEL: system_one_as_acq_rel_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: system_one_as_acq_rel_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: system_one_as_acq_rel_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: system_one_as_acq_rel_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: system_one_as_acq_rel_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: system_one_as_acq_rel_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: system_one_as_acq_rel_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: system_one_as_acq_rel_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: system_one_as_acq_rel_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: system_one_as_acq_rel_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("one-as") acq_rel, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
-
-define amdgpu_kernel void @system_one_as_seq_cst_fence() {
-; GFX6-LABEL: system_one_as_seq_cst_fence:
-; GFX6: ; %bb.0: ; %entry
-; GFX6-NEXT: s_endpgm
-;
-; GFX7-LABEL: system_one_as_seq_cst_fence:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_endpgm
-;
-; GFX10-WGP-LABEL: system_one_as_seq_cst_fence:
-; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_endpgm
-;
-; GFX10-CU-LABEL: system_one_as_seq_cst_fence:
-; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_endpgm
-;
-; SKIP-CACHE-INV-LABEL: system_one_as_seq_cst_fence:
-; SKIP-CACHE-INV: ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT: s_endpgm
-;
-; GFX90A-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence:
-; GFX90A-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX90A-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX90A-TGSPLIT-LABEL: system_one_as_seq_cst_fence:
-; GFX90A-TGSPLIT: ; %bb.0: ; %entry
-; GFX90A-TGSPLIT-NEXT: s_endpgm
-;
-; GFX940-NOTTGSPLIT-LABEL: system_one_as_seq_cst_fence:
-; GFX940-NOTTGSPLIT: ; %bb.0: ; %entry
-; GFX940-NOTTGSPLIT-NEXT: s_endpgm
-;
-; GFX940-TGSPLIT-LABEL: system_one_as_seq_cst_fence:
-; GFX940-TGSPLIT: ; %bb.0: ; %entry
-; GFX940-TGSPLIT-NEXT: s_endpgm
-;
-; GFX11-WGP-LABEL: system_one_as_seq_cst_fence:
-; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_endpgm
-;
-; GFX11-CU-LABEL: system_one_as_seq_cst_fence:
-; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_endpgm
-entry:
- fence syncscope("one-as") seq_cst, !mmra !{!"amdgpu-as", !"image"}
- ret void
-}
More information about the llvm-commits
mailing list