[llvm] AMDGPU: Refactor atomicrmw fadd expansion logic (NFC) (PR #89469)
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 23 12:53:18 PDT 2024
================
@@ -16075,56 +16075,50 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
return AtomicExpansionKind::CmpXChg;
}
- if (!Ty->isFloatTy() && (!Subtarget->hasGFX90AInsts() || !Ty->isDoubleTy()))
+ if (!AMDGPU::isFlatGlobalAddrSpace(AS) &&
+ AS != AMDGPUAS::BUFFER_FAT_POINTER)
return AtomicExpansionKind::CmpXChg;
- if ((AMDGPU::isFlatGlobalAddrSpace(AS) ||
- AS == AMDGPUAS::BUFFER_FAT_POINTER) &&
- Subtarget->hasAtomicFaddNoRtnInsts()) {
- if (Subtarget->hasGFX940Insts())
- return AtomicExpansionKind::None;
+ // TODO: gfx940 supports v2f16 and v2bf16
+ if (Subtarget->hasGFX940Insts() && (Ty->isFloatTy() || Ty->isDoubleTy()))
+ return AtomicExpansionKind::None;
- if (unsafeFPAtomicsDisabled(RMW->getFunction()))
- return AtomicExpansionKind::CmpXChg;
+ if (unsafeFPAtomicsDisabled(RMW->getFunction()))
+ return AtomicExpansionKind::CmpXChg;
- // Always expand system scope fp atomics.
- if (HasSystemScope)
- return AtomicExpansionKind::CmpXChg;
+ // Always expand system scope fp atomics.
+ if (HasSystemScope)
+ return AtomicExpansionKind::CmpXChg;
- if ((AMDGPU::isExtendedGlobalAddrSpace(AS) ||
- AS == AMDGPUAS::BUFFER_FAT_POINTER) &&
- Ty->isFloatTy()) {
- // global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940, gfx11+.
- if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
- return ReportUnsafeHWInst(AtomicExpansionKind::None);
- // global/buffer atomic fadd f32 rtn: gfx90a, gfx940, gfx11+.
- if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
- return ReportUnsafeHWInst(AtomicExpansionKind::None);
- }
+ // global and flat atomic fadd f64: gfx90a, gfx940.
+ if (Subtarget->hasGFX90AInsts() && Ty->isDoubleTy())
+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
- // flat atomic fadd f32: gfx940, gfx11+.
- if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy() &&
- Subtarget->hasFlatAtomicFaddF32Inst())
+ if (AS != AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {
+ // global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940, gfx11+.
+ if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
+ // global/buffer atomic fadd f32 rtn: gfx90a, gfx940, gfx11+.
+ if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
return ReportUnsafeHWInst(AtomicExpansionKind::None);
+ }
- // global and flat atomic fadd f64: gfx90a, gfx940.
- if (Ty->isDoubleTy() && Subtarget->hasGFX90AInsts())
+ // flat atomic fadd f32: gfx940, gfx11+.
----------------
rampitec wrote:
Comment seems to be wrong, gfx940 shall be already handled by this point (where the check for hasGFX940Insts is).
https://github.com/llvm/llvm-project/pull/89469
More information about the llvm-commits
mailing list