[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for global atomic fadd denormal support (PR #96443)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Jun 27 02:10:34 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96443
>From eaa00157741d5e4f134df22ed27a80fe3d853e6e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sun, 23 Jun 2024 16:44:08 +0200
Subject: [PATCH 1/3] AMDGPU: Add subtarget feature for global atomic fadd
denormal support
Not sure what the behavior for gfx90a is. The SPG says it always flushes.
The instruction documentation says it does not.
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 14 ++++++++++++--
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 7 +++++++
2 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 56ec5e9c4cfc2..6b212e1b2af03 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst
"Has flat_atomic_add_f32 instruction"
>;
+def FeatureMemoryAtomicFaddF32DenormalSupport
+ : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support",
+ "HasAtomicMemoryAtomicFaddF32DenormalSupport",
+ "true",
+ "global/flat/buffer atomic fadd for float supports denormal handling"
+>;
+
def FeatureAgentScopeFineGrainedRemoteMemoryAtomics
: SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics",
"HasAgentScopeFineGrainedRemoteMemoryAtomics",
@@ -1427,7 +1434,8 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureKernargPreload,
FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF64FlatInsts,
- FeatureAgentScopeFineGrainedRemoteMemoryAtomics
+ FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
+ FeatureMemoryAtomicFaddF32DenormalSupport
]>;
def FeatureISAVersion9_4_0 : FeatureSet<
@@ -1631,7 +1639,9 @@ def FeatureISAVersion12 : FeatureSet<
FeatureScalarDwordx3Loads,
FeatureDPPSrc1SGPR,
FeatureMaxHardClauseLength32,
- Feature1_5xVGPRs]>;
+ Feature1_5xVGPRs,
+ FeatureMemoryAtomicFaddF32DenormalSupport]>;
+ ]>;
def FeatureISAVersion12_Generic: FeatureSet<
!listconcat(FeatureISAVersion12.Features,
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 9e2a316a9ed28..db0b2b67a0388 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -167,6 +167,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasAtomicFlatPkAdd16Insts = false;
bool HasAtomicFaddRtnInsts = false;
bool HasAtomicFaddNoRtnInsts = false;
+ bool HasAtomicMemoryAtomicFaddF32DenormalSupport = false;
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false;
bool HasAtomicBufferGlobalPkAddF16Insts = false;
bool HasAtomicCSubNoRtnInsts = false;
@@ -872,6 +873,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
+ /// \return true if the target's flat, global, and buffer atomic fadd for
+ /// float supports denormal handling.
+ bool hasMemoryAtomicFaddF32DenormalSupport() const {
+ return HasAtomicMemoryAtomicFaddF32DenormalSupport;
+ }
+
/// \return true if atomic operations targeting fine-grained memory work
/// correctly at device scope, in allocations in host or peer PCIe device
/// memory.
>From 84c8e017f521236c51a75a275c24f87dc919fd4b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 24 Jun 2024 12:10:37 +0200
Subject: [PATCH 2/3] Add to gfx11.
RDNA 3 manual says "Floating-point addition handles NAN/INF/denorm"
thought I'm not sure I trust it.
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 6b212e1b2af03..39a1d629a4aea 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1547,7 +1547,8 @@ def FeatureISAVersion11_Common : FeatureSet<
FeatureFlatAtomicFaddF32Inst,
FeatureImageInsts,
FeaturePackedTID,
- FeatureVcmpxPermlaneHazard]>;
+ FeatureVcmpxPermlaneHazard,
+ FeatureMemoryAtomicFaddF32DenormalSupport]>;
// There are few workarounds that need to be
// added to all targets. This pessimizes codegen
@@ -1640,7 +1641,7 @@ def FeatureISAVersion12 : FeatureSet<
FeatureDPPSrc1SGPR,
FeatureMaxHardClauseLength32,
Feature1_5xVGPRs,
- FeatureMemoryAtomicFaddF32DenormalSupport]>;
+ FeatureMemoryAtomicFaddF32DenormalSupport
]>;
def FeatureISAVersion12_Generic: FeatureSet<
>From 5a627920d5c77a3b1d9b9ec1ddef1aa31fa1cf09 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 26 Jun 2024 11:30:51 +0200
Subject: [PATCH 3/3] Rename
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 10 +++++-----
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ++--
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 39a1d629a4aea..34c6f6ff19bff 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -788,9 +788,9 @@ def FeatureFlatAtomicFaddF32Inst
"Has flat_atomic_add_f32 instruction"
>;
-def FeatureMemoryAtomicFaddF32DenormalSupport
+def FeatureMemoryAtomicFAddF32DenormalSupport
: SubtargetFeature<"memory-atomic-fadd-f32-denormal-support",
- "HasAtomicMemoryAtomicFaddF32DenormalSupport",
+ "HasMemoryAtomicFaddF32DenormalSupport",
"true",
"global/flat/buffer atomic fadd for float supports denormal handling"
>;
@@ -1435,7 +1435,7 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF64FlatInsts,
FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
- FeatureMemoryAtomicFaddF32DenormalSupport
+ FeatureMemoryAtomicFAddF32DenormalSupport
]>;
def FeatureISAVersion9_4_0 : FeatureSet<
@@ -1548,7 +1548,7 @@ def FeatureISAVersion11_Common : FeatureSet<
FeatureImageInsts,
FeaturePackedTID,
FeatureVcmpxPermlaneHazard,
- FeatureMemoryAtomicFaddF32DenormalSupport]>;
+ FeatureMemoryAtomicFAddF32DenormalSupport]>;
// There are few workarounds that need to be
// added to all targets. This pessimizes codegen
@@ -1641,7 +1641,7 @@ def FeatureISAVersion12 : FeatureSet<
FeatureDPPSrc1SGPR,
FeatureMaxHardClauseLength32,
Feature1_5xVGPRs,
- FeatureMemoryAtomicFaddF32DenormalSupport
+ FeatureMemoryAtomicFAddF32DenormalSupport
]>;
def FeatureISAVersion12_Generic: FeatureSet<
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index db0b2b67a0388..11894174bdffe 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -167,7 +167,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasAtomicFlatPkAdd16Insts = false;
bool HasAtomicFaddRtnInsts = false;
bool HasAtomicFaddNoRtnInsts = false;
- bool HasAtomicMemoryAtomicFaddF32DenormalSupport = false;
+ bool HasMemoryAtomicFaddF32DenormalSupport = false;
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false;
bool HasAtomicBufferGlobalPkAddF16Insts = false;
bool HasAtomicCSubNoRtnInsts = false;
@@ -876,7 +876,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// \return true if the target's flat, global, and buffer atomic fadd for
/// float supports denormal handling.
bool hasMemoryAtomicFaddF32DenormalSupport() const {
- return HasAtomicMemoryAtomicFaddF32DenormalSupport;
+ return HasMemoryAtomicFaddF32DenormalSupport;
}
/// \return true if atomic operations targeting fine-grained memory work
More information about the llvm-branch-commits
mailing list