[llvm] [AMDGPU] Handle amdgpu.last.use metadata (PR #83816)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 4 02:10:54 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Mirko BrkuĊĦanin (mbrkusanin)
<details>
<summary>Changes</summary>
Convert !amdgpu.last.use metadata into Machine Memory Operand for last use
and handle it in SIMemoryLegalizer similar to nontemporal and volatile.
---
Patch is 1.85 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83816.diff
28 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+5-2)
- (modified) llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (+58-8)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll (+352)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll (+2102)
- (added) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll (+66)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll (+267)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll (+1866)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll (+2102)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll (+156)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll (+1844)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll (+1918)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll (+2216)
- (added) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll (+65)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll (+256)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll (+2170)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll (+2128)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll (+151)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll (+2170)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll (+2206)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll (+1836)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll (+281)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll (+1770)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll (+1836)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll (+139)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll (+1770)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll (+1836)
- (added) llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll (+70)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll (+203)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0df0b5cdf0f392..72c79cc58f8ea9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16181,9 +16181,12 @@ bool SITargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
MachineMemOperand::Flags
SITargetLowering::getTargetMMOFlags(const Instruction &I) const {
// Propagate metadata set by AMDGPUAnnotateUniformValues to the MMO of a load.
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
if (I.getMetadata("amdgpu.noclobber"))
- return MONoClobber;
- return MachineMemOperand::MONone;
+ Flags |= MONoClobber;
+ if (I.getMetadata("amdgpu.last.use"))
+ Flags |= MOLastUse;
+ return Flags;
}
bool SITargetLowering::checkForPhysRegDependency(
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 4069a368f68719..ff3e6bc9f8b08f 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -98,6 +98,7 @@ class SIMemOpInfo final {
bool IsCrossAddressSpaceOrdering = false;
bool IsVolatile = false;
bool IsNonTemporal = false;
+ bool IsLastUse = false;
SIMemOpInfo(AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
SIAtomicScope Scope = SIAtomicScope::SYSTEM,
@@ -107,13 +108,15 @@ class SIMemOpInfo final {
AtomicOrdering FailureOrdering =
AtomicOrdering::SequentiallyConsistent,
bool IsVolatile = false,
- bool IsNonTemporal = false)
+ bool IsNonTemporal = false,
+ bool IsLastUse = false)
: Ordering(Ordering), FailureOrdering(FailureOrdering),
Scope(Scope), OrderingAddrSpace(OrderingAddrSpace),
InstrAddrSpace(InstrAddrSpace),
IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
IsVolatile(IsVolatile),
- IsNonTemporal(IsNonTemporal) {
+ IsNonTemporal(IsNonTemporal),
+ IsLastUse(IsLastUse) {
if (Ordering == AtomicOrdering::NotAtomic) {
assert(Scope == SIAtomicScope::NONE &&
@@ -201,6 +204,12 @@ class SIMemOpInfo final {
return IsNonTemporal;
}
+ /// \returns True if memory access of the machine instruction used to
+ /// create this SIMemOpInfo is last use, false otherwise.
+ bool isLastUse() const {
+ return IsLastUse;
+ }
+
/// \returns True if ordering constraint of the machine instruction used to
/// create this SIMemOpInfo is unordered or higher, false otherwise.
bool isAtomic() const {
@@ -316,6 +325,12 @@ class SICacheControl {
return false;
};
+ /// Update \p MI memory instruction to indicate it is a last use. Return true
+ /// iff the instruction was modified.
+ virtual bool enableLastUse(MachineInstr &MI, bool IsLastUse) const {
+ return false;
+ }
+
/// Inserts any necessary instructions at position \p Pos relative
/// to instruction \p MI to ensure memory instructions before \p Pos of kind
/// \p Op associated with address spaces \p AddrSpace have completed. Used
@@ -592,6 +607,10 @@ class SIGfx12CacheControl : public SIGfx11CacheControl {
// MI. \returns Returns true if \p MI is modified, false otherwise.
bool setScope(const MachineBasicBlock::iterator MI,
AMDGPU::CPol::CPol Value) const;
+ // Checks if CPol operand is present in instruction \p MI and if current Scope
+ // policy is same as \p Value.
+ bool isScope(const MachineBasicBlock::iterator MI,
+ AMDGPU::CPol::CPol Value) const;
// Stores with system scope (SCOPE_SYS) need to wait for:
// - loads or atomics(returning) - wait for {LOAD|SAMPLE|BVH|KM}CNT==0
@@ -618,6 +637,9 @@ class SIGfx12CacheControl : public SIGfx11CacheControl {
bool IsNonTemporal) const override;
bool expandSystemScopeStore(MachineBasicBlock::iterator &MI) const override;
+
+ bool enableLastUse(MachineInstr &MI,
+ bool IsLastUse) const override;
};
class SIMemoryLegalizer final : public MachineFunctionPass {
@@ -745,12 +767,14 @@ std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
bool IsNonTemporal = true;
bool IsVolatile = false;
+ bool IsLastUse = false;
// Validator should check whether or not MMOs cover the entire set of
// locations accessed by the memory instruction.
for (const auto &MMO : MI->memoperands()) {
IsNonTemporal &= MMO->isNonTemporal();
IsVolatile |= MMO->isVolatile();
+ IsLastUse |= MMO->getFlags() & MOLastUse;
InstrAddrSpace |=
toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
AtomicOrdering OpOrdering = MMO->getSuccessOrdering();
@@ -792,7 +816,7 @@ std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
}
return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
- IsNonTemporal);
+ IsNonTemporal, IsLastUse);
}
std::optional<SIMemOpInfo>
@@ -2209,6 +2233,15 @@ bool SIGfx12CacheControl::setScope(const MachineBasicBlock::iterator MI,
return false;
}
+bool SIGfx12CacheControl::isScope(const MachineBasicBlock::iterator MI,
+ AMDGPU::CPol::CPol Value) const {
+ MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol);
+ if (!CPol)
+ return false;
+
+ return (CPol->getImm() & AMDGPU::CPol::SCOPE) == Value;
+}
+
bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore(
const MachineBasicBlock::iterator MI) const {
// TODO: implement flag for frontend to give us a hint not to insert waits.
@@ -2392,6 +2425,11 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
bool Changed = false;
+ if (IsNonTemporal) {
+ // Set non-temporal hint for all cache levels.
+ Changed |= setTH(MI, AMDGPU::CPol::TH_NT);
+ }
+
if (IsVolatile) {
Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SYS);
@@ -2407,14 +2445,19 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
Position::AFTER);
}
- if (IsNonTemporal) {
- // Set non-temporal hint for all cache levels.
- Changed |= setTH(MI, AMDGPU::CPol::TH_NT);
- }
-
return Changed;
}
+bool SIGfx12CacheControl::enableLastUse(MachineInstr &MI,
+ bool IsLastUse) const {
+ assert(MI.mayLoad() && !MI.mayStore());
+
+ if (IsLastUse && !isScope(MI, AMDGPU::CPol::SCOPE_SYS))
+ return setTH(MI, AMDGPU::CPol::TH_LU);;
+
+ return false;
+}
+
bool SIGfx12CacheControl::expandSystemScopeStore(
MachineBasicBlock::iterator &MI) const {
MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol);
@@ -2471,12 +2514,19 @@ bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
return Changed;
}
+ // enableVolatileAndOrNonTemporal can insert instructions and advance iterator
+ // MI and we need original instruction for enabling last use.
+ MachineInstr &Inst = *MI;
+
// Atomic instructions already bypass caches to the scope specified by the
// SyncScope operand. Only non-atomic volatile and nontemporal instructions
// need additional treatment.
Changed |= CC->enableVolatileAndOrNonTemporal(MI, MOI.getInstrAddrSpace(),
SIMemOp::LOAD, MOI.isVolatile(),
MOI.isNonTemporal());
+
+ Changed |= CC->enableLastUse(Inst, MOI.isLastUse());
+
return Changed;
}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
index 77962fadcacfc6..e13542f61474e2 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
@@ -10,6 +10,8 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
define amdgpu_kernel void @singlethread_acquire_fence() {
; GFX6-LABEL: singlethread_acquire_fence:
@@ -55,6 +57,14 @@ define amdgpu_kernel void @singlethread_acquire_fence() {
; GFX11-CU-LABEL: singlethread_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread") acquire
ret void
@@ -104,6 +114,14 @@ define amdgpu_kernel void @singlethread_release_fence() {
; GFX11-CU-LABEL: singlethread_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread") release
ret void
@@ -153,6 +171,14 @@ define amdgpu_kernel void @singlethread_acq_rel_fence() {
; GFX11-CU-LABEL: singlethread_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread") acq_rel
ret void
@@ -202,6 +228,14 @@ define amdgpu_kernel void @singlethread_seq_cst_fence() {
; GFX11-CU-LABEL: singlethread_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread") seq_cst
ret void
@@ -251,6 +285,14 @@ define amdgpu_kernel void @singlethread_one_as_acquire_fence() {
; GFX11-CU-LABEL: singlethread_one_as_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_one_as_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_one_as_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") acquire
ret void
@@ -300,6 +342,14 @@ define amdgpu_kernel void @singlethread_one_as_release_fence() {
; GFX11-CU-LABEL: singlethread_one_as_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_one_as_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_one_as_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") release
ret void
@@ -349,6 +399,14 @@ define amdgpu_kernel void @singlethread_one_as_acq_rel_fence() {
; GFX11-CU-LABEL: singlethread_one_as_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_one_as_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_one_as_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") acq_rel
ret void
@@ -398,6 +456,14 @@ define amdgpu_kernel void @singlethread_one_as_seq_cst_fence() {
; GFX11-CU-LABEL: singlethread_one_as_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: singlethread_one_as_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: singlethread_one_as_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") seq_cst
ret void
@@ -447,6 +513,14 @@ define amdgpu_kernel void @wavefront_acquire_fence() {
; GFX11-CU-LABEL: wavefront_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront") acquire
ret void
@@ -496,6 +570,14 @@ define amdgpu_kernel void @wavefront_release_fence() {
; GFX11-CU-LABEL: wavefront_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront") release
ret void
@@ -545,6 +627,14 @@ define amdgpu_kernel void @wavefront_acq_rel_fence() {
; GFX11-CU-LABEL: wavefront_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront") acq_rel
ret void
@@ -594,6 +684,14 @@ define amdgpu_kernel void @wavefront_seq_cst_fence() {
; GFX11-CU-LABEL: wavefront_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront") seq_cst
ret void
@@ -643,6 +741,14 @@ define amdgpu_kernel void @wavefront_one_as_acquire_fence() {
; GFX11-CU-LABEL: wavefront_one_as_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_one_as_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_one_as_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") acquire
ret void
@@ -692,6 +798,14 @@ define amdgpu_kernel void @wavefront_one_as_release_fence() {
; GFX11-CU-LABEL: wavefront_one_as_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_one_as_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_one_as_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") release
ret void
@@ -741,6 +855,14 @@ define amdgpu_kernel void @wavefront_one_as_acq_rel_fence() {
; GFX11-CU-LABEL: wavefront_one_as_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_one_as_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_one_as_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") acq_rel
ret void
@@ -790,6 +912,14 @@ define amdgpu_kernel void @wavefront_one_as_seq_cst_fence() {
; GFX11-CU-LABEL: wavefront_one_as_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: wavefront_one_as_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: wavefront_one_as_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") seq_cst
ret void
@@ -843,6 +973,15 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX11-CU-LABEL: workgroup_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire
ret void
@@ -892,6 +1031,14 @@ define amdgpu_kernel void @workgroup_release_fence() {
; GFX11-CU-LABEL: workgroup_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release
ret void
@@ -945,6 +1092,15 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
; GFX11-CU-LABEL: workgroup_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel
ret void
@@ -998,6 +1154,15 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
; GFX11-CU-LABEL: workgroup_seq_cst_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_seq_cst_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_seq_cst_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst
ret void
@@ -1051,6 +1216,15 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
; GFX11-CU-LABEL: workgroup_one_as_acquire_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_one_as_acquire_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_one_as_acquire_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acquire
ret void
@@ -1100,6 +1274,14 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
; GFX11-CU-LABEL: workgroup_one_as_release_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_one_as_release_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_one_as_release_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") release
ret void
@@ -1153,6 +1335,15 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
; GFX11-CU-LABEL: workgroup_one_as_acq_rel_fence:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: s_endpgm
+;
+; GFX12-WGP-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX12-WGP: ; %bb.0: ; %entry
+; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE
+; GFX12-WGP-NEXT: s_endpgm
+;
+; GFX12-CU-LABEL: workgroup_one_as_acq_rel_fence:
+; GFX12-CU: ; %bb.0: ; %entry
+; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acq_rel
ret void
@@ -1206,6 +1397,15 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
; GFX1...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/83816
More information about the llvm-commits
mailing list