[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jun 24 06:42:42 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96444
>From 80c3f71f03d3b2ccbcd418d76d417f2a243fdbe4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sun, 23 Jun 2024 17:07:53 +0200
Subject: [PATCH 1/2] AMDGPU: Add subtarget feature for memory atomic fadd f64
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 10 +++++++++-
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 7 +++++++
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +-
3 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 0ec65f759bc35..028c54d8d94d2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst
"Has flat_atomic_add_f32 instruction"
>;
+def FeatureFlatBufferGlobalAtomicFaddF64Inst
+ : SubtargetFeature<"flat-buffer-global-fadd-f64-inst",
+ "HasFlatBufferGlobalAtomicFaddF64Inst",
+ "true",
+ "Has flat, buffer, and global instructions for f64 atomic fadd"
+>;
+
def FeatureMemoryAtomicFaddF32DenormalSupport
: SubtargetFeature<"memory-atomic-fadd-f32-denormal-support",
"HasAtomicMemoryAtomicFaddF32DenormalSupport",
@@ -1388,7 +1395,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
FeatureBackOffBarrier,
FeatureKernargPreload,
FeatureAtomicFMinFMaxF64GlobalInsts,
- FeatureAtomicFMinFMaxF64FlatInsts
+ FeatureAtomicFMinFMaxF64FlatInsts,
+ FeatureFlatBufferGlobalAtomicFaddF64Inst
])>;
def FeatureISAVersion9_0_C : FeatureSet<
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 674d84422538f..922435c5efaa6 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasAtomicGlobalPkAddBF16Inst = false;
bool HasAtomicBufferPkAddBF16Inst = false;
bool HasFlatAtomicFaddF32Inst = false;
+ bool HasFlatBufferGlobalAtomicFaddF64Inst = false;
bool HasDefaultComponentZero = false;
bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false;
bool HasDefaultComponentBroadcast = false;
@@ -873,6 +874,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
+ /// \return true if the target has flat, global, and buffer atomic fadd for
+ /// double.
+ bool hasFlatBufferGlobalAtomicFaddF64Inst() const {
+ return HasFlatBufferGlobalAtomicFaddF64Inst;
+ }
+
/// \return true if the target's flat, global, and buffer atomic fadd for
/// float supports denormal handling.
bool hasMemoryAtomicFaddF32DenormalSupport() const {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index eec750e5b8251..6b5ba160d6402 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16028,7 +16028,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
return AtomicExpansionKind::CmpXChg;
// global and flat atomic fadd f64: gfx90a, gfx940.
- if (Subtarget->hasGFX90AInsts() && Ty->isDoubleTy())
+ if (Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst() && Ty->isDoubleTy())
return ReportUnsafeHWInst(AtomicExpansionKind::None);
if (AS != AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {
>From c1354032fc55234ffddf9136f17f5ee400c01c16 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 24 Jun 2024 15:42:17 +0200
Subject: [PATCH 2/2] Add to gfx940
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 028c54d8d94d2..3ed68a259ca15 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1441,7 +1441,8 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF64FlatInsts,
FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
- FeatureMemoryAtomicFaddF32DenormalSupport
+ FeatureMemoryAtomicFaddF32DenormalSupport,
+ FeatureFlatBufferGlobalAtomicFaddF64Inst
]>;
def FeatureISAVersion9_4_0 : FeatureSet<
More information about the llvm-branch-commits
mailing list