[llvm] cdd45d5 - AMDGPU/GlobalISel: Select llvm.amdgcn.global.atomic.csub
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 29 05:27:37 PDT 2020
Author: Matt Arsenault
Date: 2020-07-29T08:27:31-04:00
New Revision: cdd45d5f9cf61e4a4c0bb01d5cdcdd3d12ffb10f
URL: https://github.com/llvm/llvm-project/commit/cdd45d5f9cf61e4a4c0bb01d5cdcdd3d12ffb10f
DIFF: https://github.com/llvm/llvm-project/commit/cdd45d5f9cf61e4a4c0bb01d5cdcdd3d12ffb10f.diff
LOG: AMDGPU/GlobalISel: Select llvm.amdgcn.global.atomic.csub
Remove the custom node boilerplate. Not sure why this tried to handle
the LDS atomic stuff.
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/BUFInstructions.td
llvm/lib/Target/AMDGPU/FLATInstructions.td
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 5539f4e8699d..368af73b18d3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -716,8 +716,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
(Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
Opc == ISD::ATOMIC_LOAD_FADD ||
Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
- Opc == AMDGPUISD::ATOMIC_LOAD_FMAX ||
- Opc == AMDGPUISD::ATOMIC_LOAD_CSUB)) {
+ Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
N = glueCopyToM0LDSInit(N);
SelectCode(N);
return;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index f7cdc7461eac..3ffd9b79864a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4336,7 +4336,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(ATOMIC_DEC)
NODE_NAME_CASE(ATOMIC_LOAD_FMIN)
NODE_NAME_CASE(ATOMIC_LOAD_FMAX)
- NODE_NAME_CASE(ATOMIC_LOAD_CSUB)
NODE_NAME_CASE(BUFFER_LOAD)
NODE_NAME_CASE(BUFFER_LOAD_UBYTE)
NODE_NAME_CASE(BUFFER_LOAD_USHORT)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 85f23c81db17..e995fdcd29ce 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -508,7 +508,6 @@ enum NodeType : unsigned {
ATOMIC_DEC,
ATOMIC_LOAD_FMIN,
ATOMIC_LOAD_FMAX,
- ATOMIC_LOAD_CSUB,
BUFFER_LOAD,
BUFFER_LOAD_UBYTE,
BUFFER_LOAD_USHORT,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 93006bb68ed3..d53246967f45 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4205,6 +4205,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax:
+ case Intrinsic::amdgcn_global_atomic_csub:
return getDefaultMappingAllVGPR(MI);
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap: {
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index d175edd93c64..bb04ae06bc3b 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1007,7 +1007,7 @@ defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics <
let SubtargetPredicate = HasGFX10_BEncoding in
defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN <
- "buffer_atomic_csub", VGPR_32, i32, atomic_csub_global_32
+ "buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub
>;
let SubtargetPredicate = isGFX8GFX9 in {
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 69facada2e96..f99354b77272 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -662,7 +662,7 @@ defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
let SubtargetPredicate = HasGFX10_BEncoding in
defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub",
- VGPR_32, i32, atomic_csub_global_32>;
+ VGPR_32, i32, int_amdgcn_global_atomic_csub>;
} // End is_flat_global = 1
@@ -959,7 +959,7 @@ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
-def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CSUB_RTN, atomic_csub_global_32, i32>;
+def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CSUB_RTN, int_amdgcn_global_atomic_csub, i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 4c9fce92f9a2..db280d9fc85b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7196,19 +7196,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
Op->getVTList(), Ops, VT, M->getMemOperand());
}
- case Intrinsic::amdgcn_global_atomic_csub: {
- MemSDNode *M = cast<MemSDNode>(Op);
- SDValue Ops[] = {
- M->getOperand(0), // Chain
- M->getOperand(2), // Ptr
- M->getOperand(3) // Value
- };
-
- return DAG.getMemIntrinsicNode(AMDGPUISD::ATOMIC_LOAD_CSUB, SDLoc(Op),
- M->getVTList(), Ops, M->getMemoryVT(),
- M->getMemOperand());
- }
-
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrID))
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index b6c2082b8d23..d146114074a7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -55,10 +55,6 @@ def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2,
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
>;
-def SIatomic_csub : SDNode<"AMDGPUISD::ATOMIC_LOAD_CSUB", SDTAtomic2,
- [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
->;
-
def SDTAtomic2_f32 : SDTypeProfile<1, 2, [
SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1>
]>;
@@ -311,10 +307,6 @@ class isPackedType<ValueType SrcVT> {
// PatFrags for global memory operations
//===----------------------------------------------------------------------===//
-let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_global").AddrSpaces in {
-defm atomic_csub_global : binary_atomic_op<SIatomic_csub>;
-}
-
foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
@@ -668,7 +660,6 @@ multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
-defm atomic_load_csub : SIAtomicM0Glue2 <"LOAD_CSUB", 1>;
defm atomic_inc : SIAtomicM0Glue2 <"INC", 1>;
defm atomic_dec : SIAtomicM0Glue2 <"DEC", 1>;
defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll
new file mode 100644
index 000000000000..9bc56d124a9c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll
@@ -0,0 +1,115 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
+
+define i32 @global_atomic_csub(i32 addrspace(1)* %ptr, i32 %data) {
+; GCN-LABEL: global_atomic_csub:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_waitcnt_vscnt null, 0x0
+; GCN-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
+; GCN-NEXT: ; implicit-def: $vcc_hi
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %ptr, i32 %data)
+ ret i32 %ret
+}
+
+define i32 @global_atomic_csub_offset(i32 addrspace(1)* %ptr, i32 %data) {
+; GCN-LABEL: global_atomic_csub_offset:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_waitcnt_vscnt null, 0x0
+; GCN-NEXT: s_movk_i32 s4, 0x1000
+; GCN-NEXT: s_mov_b32 s5, 0
+; GCN-NEXT: v_mov_b32_e32 v3, s4
+; GCN-NEXT: v_mov_b32_e32 v4, s5
+; GCN-NEXT: ; implicit-def: $vcc_hi
+; GCN-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, v3
+; GCN-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo
+; GCN-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024
+ %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data)
+ ret i32 %ret
+}
+
+define void @global_atomic_csub_nortn(i32 addrspace(1)* %ptr, i32 %data) {
+; GCN-LABEL: global_atomic_csub_nortn:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_waitcnt_vscnt null, 0x0
+; GCN-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
+; GCN-NEXT: ; implicit-def: $vcc_hi
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %ptr, i32 %data)
+ ret void
+}
+
+define void @global_atomic_csub_offset_nortn(i32 addrspace(1)* %ptr, i32 %data) {
+; GCN-LABEL: global_atomic_csub_offset_nortn:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_waitcnt_vscnt null, 0x0
+; GCN-NEXT: s_movk_i32 s4, 0x1000
+; GCN-NEXT: s_mov_b32 s5, 0
+; GCN-NEXT: v_mov_b32_e32 v3, s4
+; GCN-NEXT: v_mov_b32_e32 v4, s5
+; GCN-NEXT: ; implicit-def: $vcc_hi
+; GCN-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, v3
+; GCN-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo
+; GCN-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024
+ %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data)
+ ret void
+}
+
+define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset(i32 addrspace(1)* %ptr, i32 %data) {
+; GCN-LABEL: global_atomic_csub_sgpr_base_offset:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_clause 0x1
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GCN-NEXT: s_load_dword s2, s[4:5], 0x8
+; GCN-NEXT: ; implicit-def: $vcc_hi
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_add_u32 s0, s0, 0x1000
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: v_mov_b32_e32 v1, s1
+; GCN-NEXT: v_mov_b32_e32 v2, s2
+; GCN-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: global_store_dword v[0:1], v0, off
+; GCN-NEXT: s_endpgm
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024
+ %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data)
+ store i32 %ret, i32 addrspace(1)* undef
+ ret void
+}
+
+define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset_nortn(i32 addrspace(1)* %ptr, i32 %data) {
+; GCN-LABEL: global_atomic_csub_sgpr_base_offset_nortn:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_clause 0x1
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GCN-NEXT: s_load_dword s2, s[4:5], 0x8
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_add_u32 s0, s0, 0x1000
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: v_mov_b32_e32 v1, s1
+; GCN-NEXT: v_mov_b32_e32 v2, s2
+; GCN-NEXT: global_atomic_csub v0, v[0:1], v2, off glc
+; GCN-NEXT: s_endpgm
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024
+ %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data)
+ ret void
+}
+
+declare i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* nocapture, i32) #1
+
+attributes #0 = { nounwind willreturn }
+attributes #1 = { argmemonly nounwind }
More information about the llvm-commits
mailing list