[llvm] 5d0e9dd - [AMDGPU][GlobalISel] Add support for global atomicrmw fadd
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 31 03:14:55 PDT 2021
Author: Jay Foad
Date: 2021-03-31T11:13:00+01:00
New Revision: 5d0e9ddfa512ea3b2dd500e7abe93af30c1d9e11
URL: https://github.com/llvm/llvm-project/commit/5d0e9ddfa512ea3b2dd500e7abe93af30c1d9e11
DIFF: https://github.com/llvm/llvm-project/commit/5d0e9ddfa512ea3b2dd500e7abe93af30c1d9e11.diff
LOG: [AMDGPU][GlobalISel] Add support for global atomicrmw fadd
This includes gfx908 which only has a no-return version of the
global_atomic_add_f32 instruction, using the same hack that was
previously implemented for selecting from the
llvm.amdgcn.global.atomic.fadd intrinsic.
Differential Revision: https://reviews.llvm.org/D97767
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Removed:
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd.mir
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index e2e0321d9f35d..d80e6c5d449bc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1716,7 +1716,7 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
case Intrinsic::amdgcn_s_barrier:
return selectSBarrier(I);
case Intrinsic::amdgcn_global_atomic_fadd:
- return selectGlobalAtomicFaddIntrinsic(I);
+ return selectGlobalAtomicFadd(I, I.getOperand(2), I.getOperand(3));
default: {
return selectImpl(I, *CoverageInfo);
}
@@ -2319,6 +2319,13 @@ void AMDGPUInstructionSelector::initM0(MachineInstr &I) const {
bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
MachineInstr &I) const {
+ if (I.getOpcode() == TargetOpcode::G_ATOMICRMW_FADD) {
+ const LLT PtrTy = MRI->getType(I.getOperand(1).getReg());
+ unsigned AS = PtrTy.getAddressSpace();
+ if (AS == AMDGPUAS::GLOBAL_ADDRESS)
+ return selectGlobalAtomicFadd(I, I.getOperand(1), I.getOperand(2));
+ }
+
initM0(I);
return selectImpl(I, *CoverageInfo);
}
@@ -2960,11 +2967,14 @@ bool AMDGPUInstructionSelector::selectAMDGPU_BUFFER_ATOMIC_FADD(
return true;
}
-bool AMDGPUInstructionSelector::selectGlobalAtomicFaddIntrinsic(
- MachineInstr &MI) const{
+bool AMDGPUInstructionSelector::selectGlobalAtomicFadd(
+ MachineInstr &MI, MachineOperand &AddrOp, MachineOperand &DataOp) const {
- if (STI.hasGFX90AInsts())
+ if (STI.hasGFX90AInsts()) {
+ // gfx90a adds return versions of the global atomic fadd instructions so no
+ // special handling is required.
return selectImpl(MI, *CoverageInfo);
+ }
MachineBasicBlock *MBB = MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
@@ -2981,9 +2991,9 @@ bool AMDGPUInstructionSelector::selectGlobalAtomicFaddIntrinsic(
// FIXME: This is only needed because tablegen requires number of dst operands
// in match and replace pattern to be the same. Otherwise patterns can be
// exported from SDag path.
- auto Addr = selectFlatOffsetImpl<true>(MI.getOperand(2));
+ auto Addr = selectFlatOffsetImpl<true>(AddrOp);
- Register Data = MI.getOperand(3).getReg();
+ Register Data = DataOp.getReg();
const unsigned Opc = MRI->getType(Data).isVector() ?
AMDGPU::GLOBAL_ATOMIC_PK_ADD_F16 : AMDGPU::GLOBAL_ATOMIC_ADD_F32;
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 878e3a99b6acf..10220094004b2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -147,7 +147,8 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectG_INSERT_VECTOR_ELT(MachineInstr &I) const;
bool selectG_SHUFFLE_VECTOR(MachineInstr &I) const;
bool selectAMDGPU_BUFFER_ATOMIC_FADD(MachineInstr &I) const;
- bool selectGlobalAtomicFaddIntrinsic(MachineInstr &I) const;
+ bool selectGlobalAtomicFadd(MachineInstr &I, MachineOperand &AddrOp,
+ MachineOperand &DataOp) const;
bool selectBVHIntrinsic(MachineInstr &I) const;
std::pair<Register, unsigned> selectVOP3ModsImpl(MachineOperand &Root,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 5c8da8bee2651..d86f277bf3e20 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1291,12 +1291,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
}
+ auto &Atomic = getActionDefinitionsBuilder(G_ATOMICRMW_FADD);
if (ST.hasLDSFPAtomics()) {
- auto &Atomic = getActionDefinitionsBuilder(G_ATOMICRMW_FADD)
- .legalFor({{S32, LocalPtr}, {S32, RegionPtr}});
+ Atomic.legalFor({{S32, LocalPtr}, {S32, RegionPtr}});
if (ST.hasGFX90AInsts())
Atomic.legalFor({{S64, LocalPtr}});
}
+ if (ST.hasAtomicFaddInsts())
+ Atomic.legalFor({{S32, GlobalPtr}});
// BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, and output
// demarshalling
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir
new file mode 100644
index 0000000000000..0abf6f613adab
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-global.mir
@@ -0,0 +1,22 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -O0 -run-pass=legalizer %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -O0 -run-pass=legalizer %s -o - | FileCheck %s
+
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel.*' -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s
+
+# ERR: remark: <unknown>:0:0: unable to legalize instruction: %2:_(s32) = G_ATOMICRMW_FADD %0:_(p1), %1:_ :: (load store seq_cst 4, addrspace 1) (in function: atomicrmw_fadd_global_i32)
+
+---
+name: atomicrmw_fadd_global_i32
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2
+ ; CHECK-LABEL: name: atomicrmw_fadd_global_i32
+ ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; CHECK: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1)
+ %0:_(p1) = COPY $sgpr0_sgpr1
+ %1:_(s32) = COPY $sgpr2
+ %2:_(s32) = G_ATOMICRMW_FADD %0, %1 :: (load store seq_cst 4, addrspace 1)
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir
similarity index 100%
rename from llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd.mir
rename to llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomicrmw-fadd-local.mir
More information about the llvm-commits
mailing list