[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Jun 23 13:18:30 PDT 2024
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/96444
None
>From a663c429ebe7a05754c771d67856a7cdb20cc11d Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sun, 23 Jun 2024 17:07:53 +0200
Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 10 +++++++++-
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 7 +++++++
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +-
3 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 5f798b4391704..fe3cd75d81009 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst
"Has flat_atomic_add_f32 instruction"
>;
+def FeatureFlatBufferGlobalAtomicFaddF64Inst
+ : SubtargetFeature<"flat-buffer-global-fadd-f64-inst",
+ "HasFlatBufferGlobalAtomicFaddF64Inst",
+ "true",
+ "Has flat, buffer, and global instructions for f64 atomic fadd"
+>;
+
def FeatureMemoryAtomicFaddF32DenormalSupport
: SubtargetFeature<"memory-atomic-fadd-f32-denormal-support",
"HasAtomicMemoryAtomicFaddF32DenormalSupport",
@@ -1388,7 +1395,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
FeatureBackOffBarrier,
FeatureKernargPreload,
FeatureAtomicFMinFMaxF64GlobalInsts,
- FeatureAtomicFMinFMaxF64FlatInsts
+ FeatureAtomicFMinFMaxF64FlatInsts,
+ FeatureFlatBufferGlobalAtomicFaddF64Inst
])>;
def FeatureISAVersion9_0_C : FeatureSet<
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 674d84422538f..922435c5efaa6 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasAtomicGlobalPkAddBF16Inst = false;
bool HasAtomicBufferPkAddBF16Inst = false;
bool HasFlatAtomicFaddF32Inst = false;
+ bool HasFlatBufferGlobalAtomicFaddF64Inst = false;
bool HasDefaultComponentZero = false;
bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false;
bool HasDefaultComponentBroadcast = false;
@@ -873,6 +874,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
+ /// \return true if the target has flat, global, and buffer atomic fadd for
+ /// double.
+ bool hasFlatBufferGlobalAtomicFaddF64Inst() const {
+ return HasFlatBufferGlobalAtomicFaddF64Inst;
+ }
+
/// \return true if the target's flat, global, and buffer atomic fadd for
/// float supports denormal handling.
bool hasMemoryAtomicFaddF32DenormalSupport() const {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index eec750e5b8251..6b5ba160d6402 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16028,7 +16028,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
return AtomicExpansionKind::CmpXChg;
// global and flat atomic fadd f64: gfx90a, gfx940.
- if (Subtarget->hasGFX90AInsts() && Ty->isDoubleTy())
+ if (Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst() && Ty->isDoubleTy())
return ReportUnsafeHWInst(AtomicExpansionKind::None);
if (AS != AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {
More information about the llvm-branch-commits
mailing list