[llvm-branch-commits] [llvm] AMDGPU: Add subtarget feature for memory atomic fadd f64 (PR #96444)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Jun 26 02:34:07 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96444
>From db519863301bd95fe0d50b56d74584b0f7f2fbf6 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sun, 23 Jun 2024 17:07:53 +0200
Subject: [PATCH] AMDGPU: Add subtarget feature for memory atomic fadd f64
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 21 ++++++++++++++-------
llvm/lib/Target/AMDGPU/BUFInstructions.td | 10 ++++++----
llvm/lib/Target/AMDGPU/FLATInstructions.td | 6 +++---
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 10 +++++++---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +-
5 files changed, 31 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 34c6f6ff19bff..84ea040477763 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -788,6 +788,13 @@ def FeatureFlatAtomicFaddF32Inst
"Has flat_atomic_add_f32 instruction"
>;
+def FeatureFlatBufferGlobalAtomicFaddF64Inst
+ : SubtargetFeature<"flat-buffer-global-fadd-f64-inst",
+ "HasFlatBufferGlobalAtomicFaddF64Inst",
+ "true",
+ "Has flat, buffer, and global instructions for f64 atomic fadd"
+>;
+
def FeatureMemoryAtomicFAddF32DenormalSupport
: SubtargetFeature<"memory-atomic-fadd-f32-denormal-support",
"HasMemoryAtomicFaddF32DenormalSupport",
@@ -1390,7 +1397,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
FeatureBackOffBarrier,
FeatureKernargPreload,
FeatureAtomicFMinFMaxF64GlobalInsts,
- FeatureAtomicFMinFMaxF64FlatInsts
+ FeatureAtomicFMinFMaxF64FlatInsts,
+ FeatureFlatBufferGlobalAtomicFaddF64Inst
])>;
def FeatureISAVersion9_0_C : FeatureSet<
@@ -1435,7 +1443,8 @@ def FeatureISAVersion9_4_Common : FeatureSet<
FeatureAtomicFMinFMaxF64GlobalInsts,
FeatureAtomicFMinFMaxF64FlatInsts,
FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
- FeatureMemoryAtomicFAddF32DenormalSupport
+ FeatureMemoryAtomicFAddF32DenormalSupport,
+ FeatureFlatBufferGlobalAtomicFaddF64Inst
]>;
def FeatureISAVersion9_4_0 : FeatureSet<
@@ -1932,11 +1941,9 @@ def isGFX12Plus :
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
-
-def HasBufferFlatGlobalAtomicsF64 : // FIXME: Rename to show it's only for fadd
- Predicate<"Subtarget->hasBufferFlatGlobalAtomicsF64()">,
- // FIXME: This is too coarse, and working around using pseudo's predicates on real instruction.
- AssemblerPredicate<(any_of FeatureGFX90AInsts, FeatureGFX10Insts, FeatureSouthernIslands, FeatureSeaIslands)>;
+def HasFlatBufferGlobalAtomicFaddF64Inst :
+ Predicate<"Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst()">,
+ AssemblerPredicate<(any_of FeatureFlatBufferGlobalAtomicFaddF64Inst)>;
def HasAtomicFMinFMaxF32GlobalInsts :
Predicate<"Subtarget->hasAtomicFMinFMaxF32GlobalInsts()">,
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 3b8d94b744000..a904c8483dbf5 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1312,14 +1312,16 @@ let SubtargetPredicate = isGFX90APlus in {
}
} // End SubtargetPredicate = isGFX90APlus
-let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
+let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in {
defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>;
+} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst
+let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
// Note the names can be buffer_atomic_fmin_x2/buffer_atomic_fmax_x2
// depending on some subtargets.
defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>;
defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>;
-} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
+}
def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> {
let SubtargetPredicate = isGFX940Plus;
@@ -1836,9 +1838,9 @@ let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>;
} // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts
-let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
+let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">;
-} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
+} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst
let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 98054dde398b3..89946a4719557 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -772,10 +772,10 @@ defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64",
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
}
-let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
+let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in {
defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>;
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>;
-} // End SubtargetPredicate = HasBufferFlatGlobalAtomicsF64
+} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst
let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in {
defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>;
@@ -1654,7 +1654,7 @@ defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin",
defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
}
-let OtherPredicates = [HasBufferFlatGlobalAtomicsF64] in {
+let OtherPredicates = [HasFlatBufferGlobalAtomicFaddF64Inst] in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>;
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 11894174bdffe..e5817594a4521 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasAtomicGlobalPkAddBF16Inst = false;
bool HasAtomicBufferPkAddBF16Inst = false;
bool HasFlatAtomicFaddF32Inst = false;
+ bool HasFlatBufferGlobalAtomicFaddF64Inst = false;
bool HasDefaultComponentZero = false;
bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false;
bool HasDefaultComponentBroadcast = false;
@@ -660,9 +661,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return GFX10_BEncoding;
}
- // BUFFER/FLAT/GLOBAL_ATOMIC_ADD/MIN/MAX_F64
- bool hasBufferFlatGlobalAtomicsF64() const { return hasGFX90AInsts(); }
-
bool hasExportInsts() const {
return !hasGFX940Insts();
}
@@ -873,6 +871,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
+ /// \return true if the target has flat, global, and buffer atomic fadd for
+ /// double.
+ bool hasFlatBufferGlobalAtomicFaddF64Inst() const {
+ return HasFlatBufferGlobalAtomicFaddF64Inst;
+ }
+
/// \return true if the target's flat, global, and buffer atomic fadd for
/// float supports denormal handling.
bool hasMemoryAtomicFaddF32DenormalSupport() const {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b8ff5ed35ac80..fc34277c580a8 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16199,7 +16199,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
return AtomicExpansionKind::CmpXChg;
// global and flat atomic fadd f64: gfx90a, gfx940.
- if (Subtarget->hasGFX90AInsts() && Ty->isDoubleTy())
+ if (Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst() && Ty->isDoubleTy())
return ReportUnsafeHWInst(AtomicExpansionKind::None);
if (AS != AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {
More information about the llvm-branch-commits
mailing list