[llvm-branch-commits] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Jun 25 02:10:17 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96442
>From 1a441c05eb510f3310604594b2687ddf90e884fe Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sun, 23 Jun 2024 22:02:36 +0200
Subject: [PATCH] AMDGPU: Add a subtarget feature for fine-grained remote
memory support
Atomic access to fine-grained remote memory does not work on all
subtargets. Add a feature for targets where this is expected to work.
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 16 ++++++++++++++--
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 8 ++++++++
2 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 5bc0fe8bba608..4d2faacaa915b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -788,6 +788,16 @@ def FeatureFlatAtomicFaddF32Inst
"Has flat_atomic_add_f32 instruction"
>;
+def FeatureAgentScopeFineGrainedRemoteMemoryAtomics
+ : SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics",
+ "HasAgentScopeFineGrainedRemoteMemoryAtomics",
+ "true",
+ "Agent (device) scoped atomic operations, excluding those directly "
+ "supported by PCIe (i.e. integer atomic add, exchange, and "
+ "compare-and-swap), are functional for allocations in host or peer "
+ "device memory."
+>;
+
def FeatureDefaultComponentZero : SubtargetFeature<"default-component-zero",
"HasDefaultComponentZero",
"true",
@@ -1207,7 +1217,8 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess,
FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast,
FeatureMaxHardClauseLength32,
- FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts
+ FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
+ FeatureAgentScopeFineGrainedRemoteMemoryAtomics
]
>;
@@ -1415,7 +1426,8 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureBackOffBarrier,
FeatureKernargPreload,
FeatureAtomicFMinFMaxF64GlobalInsts,
- FeatureAtomicFMinFMaxF64FlatInsts
+ FeatureAtomicFMinFMaxF64FlatInsts,
+ FeatureAgentScopeFineGrainedRemoteMemoryAtomics
]>;
def FeatureISAVersion9_4_0 : FeatureSet<
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 966708db4f37c..c40efbdcf7f0b 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasAtomicBufferPkAddBF16Inst = false;
bool HasFlatAtomicFaddF32Inst = false;
bool HasDefaultComponentZero = false;
+ bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false;
bool HasDefaultComponentBroadcast = false;
/// The maximum number of instructions that may be placed within an S_CLAUSE,
/// which is one greater than the maximum argument to S_CLAUSE. A value of 0
@@ -871,6 +872,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
+ /// \return true if atomic operations targeting fine-grained memory work
+ /// correctly at device scope, in allocations in host or peer PCIe device
+ /// memory.
+ bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const {
+ return HasAgentScopeFineGrainedRemoteMemoryAtomics;
+ }
+
bool hasDefaultComponentZero() const { return HasDefaultComponentZero; }
bool hasDefaultComponentBroadcast() const {
More information about the llvm-branch-commits
mailing list