[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for global atomic fadd denormal support (PR #96443)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Jun 23 13:20:21 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
Not sure what the behavior for gfx90a is. The SPG says it always flushes.
The instruction documentation says it does not.
---
Full diff: https://github.com/llvm/llvm-project/pull/96443.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+11-2)
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+7)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 7ff861f5b144d..5f798b4391704 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst
"Has flat_atomic_add_f32 instruction"
>;
+def FeatureMemoryAtomicFaddF32DenormalSupport
+ : SubtargetFeature<"memory-atomic-fadd-f32-denormal-support",
+ "HasAtomicMemoryAtomicFaddF32DenormalSupport",
+ "true",
+ "global/flat/buffer atomic fadd for float supports denormal handling"
+>;
+
def FeatureAgentScopeFineGrainedRemoteMemoryAtomics
: SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics",
"HasAgentScopeFineGrainedRemoteMemoryAtomics",
@@ -1425,7 +1432,8 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureKernargPreload,
FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF64FlatInsts,
- FeatureAgentScopeFineGrainedRemoteMemoryAtomics
+ FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
+ FeatureMemoryAtomicFaddF32DenormalSupport
]>;
def FeatureISAVersion9_4_0 : FeatureSet<
@@ -1628,7 +1636,8 @@ def FeatureISAVersion12 : FeatureSet<
FeatureVGPRSingleUseHintInsts,
FeatureScalarDwordx3Loads,
FeatureDPPSrc1SGPR,
- FeatureMaxHardClauseLength32]>;
+ FeatureMaxHardClauseLength32,
+ FeatureMemoryAtomicFaddF32DenormalSupport]>;
def FeatureISAVersion12_Generic: FeatureSet<
!listconcat(FeatureISAVersion12.Features,
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index c40efbdcf7f0b..674d84422538f 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -167,6 +167,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasAtomicFlatPkAdd16Insts = false;
bool HasAtomicFaddRtnInsts = false;
bool HasAtomicFaddNoRtnInsts = false;
+ bool HasAtomicMemoryAtomicFaddF32DenormalSupport = false;
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false;
bool HasAtomicBufferGlobalPkAddF16Insts = false;
bool HasAtomicCSubNoRtnInsts = false;
@@ -872,6 +873,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
+ /// \return true if the target's flat, global, and buffer atomic fadd for
+ /// float supports denormal handling.
+ bool hasMemoryAtomicFaddF32DenormalSupport() const {
+ return HasAtomicMemoryAtomicFaddF32DenormalSupport;
+ }
+
/// \return true if atomic operations targeting fine-grained memory work
/// correctly at device scope, in allocations in host or peer PCIe device
/// memory.
``````````
</details>
https://github.com/llvm/llvm-project/pull/96443
More information about the llvm-branch-commits
mailing list