[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Jun 23 13:21:27 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/96444.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+9-1)
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+7)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+1-1)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 5f798b4391704..fe3cd75d81009 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst
"Has flat_atomic_add_f32 instruction"
>;
+def FeatureFlatBufferGlobalAtomicFaddF64Inst
+ : SubtargetFeature<"flat-buffer-global-fadd-f64-inst",
+ "HasFlatBufferGlobalAtomicFaddF64Inst",
+ "true",
+ "Has flat, buffer, and global instructions for f64 atomic fadd"
+>;
+
def FeatureMemoryAtomicFaddF32DenormalSupport
: SubtargetFeature<"memory-atomic-fadd-f32-denormal-support",
"HasAtomicMemoryAtomicFaddF32DenormalSupport",
@@ -1388,7 +1395,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
FeatureBackOffBarrier,
FeatureKernargPreload,
FeatureAtomicFMinFMaxF64GlobalInsts,
- FeatureAtomicFMinFMaxF64FlatInsts
+ FeatureAtomicFMinFMaxF64FlatInsts,
+ FeatureFlatBufferGlobalAtomicFaddF64Inst
])>;
def FeatureISAVersion9_0_C : FeatureSet<
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 674d84422538f..922435c5efaa6 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasAtomicGlobalPkAddBF16Inst = false;
bool HasAtomicBufferPkAddBF16Inst = false;
bool HasFlatAtomicFaddF32Inst = false;
+ bool HasFlatBufferGlobalAtomicFaddF64Inst = false;
bool HasDefaultComponentZero = false;
bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false;
bool HasDefaultComponentBroadcast = false;
@@ -873,6 +874,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
+ /// \return true if the target has flat, global, and buffer atomic fadd for
+ /// double.
+ bool hasFlatBufferGlobalAtomicFaddF64Inst() const {
+ return HasFlatBufferGlobalAtomicFaddF64Inst;
+ }
+
/// \return true if the target's flat, global, and buffer atomic fadd for
/// float supports denormal handling.
bool hasMemoryAtomicFaddF32DenormalSupport() const {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index eec750e5b8251..6b5ba160d6402 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16028,7 +16028,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
return AtomicExpansionKind::CmpXChg;
// global and flat atomic fadd f64: gfx90a, gfx940.
- if (Subtarget->hasGFX90AInsts() && Ty->isDoubleTy())
+ if (Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst() && Ty->isDoubleTy())
return ReportUnsafeHWInst(AtomicExpansionKind::None);
if (AS != AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {
``````````
</details>
https://github.com/llvm/llvm-project/pull/96444
More information about the llvm-branch-commits
mailing list