[llvm] r335325 - AMDGPU: Add patterns for i32/i64 local atomic load/store
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 22 01:39:52 PDT 2018
Author: arsenm
Date: Fri Jun 22 01:39:52 2018
New Revision: 335325
URL: http://llvm.org/viewvc/llvm-project?rev=335325&view=rev
Log:
AMDGPU: Add patterns for i32/i64 local atomic load/store
Not sure why the 32/64 split is needed in the atomic_load
store hierarchies. The regular PatFrags do this, but we don't
do it for the existing handling for global.
Added:
llvm/trunk/test/CodeGen/AMDGPU/atomic_load_local.ll
llvm/trunk/test/CodeGen/AMDGPU/atomic_store_local.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
llvm/trunk/lib/Target/AMDGPU/DSInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp?rev=335325&r1=335324&r2=335325&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Fri Jun 22 01:39:52 2018
@@ -564,7 +564,9 @@ void AMDGPUDAGToDAGISel::Select(SDNode *
return;
}
case ISD::LOAD:
- case ISD::STORE: {
+ case ISD::STORE:
+ case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_STORE: {
N = glueCopyToM0(N);
break;
}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td?rev=335325&r1=335324&r2=335325&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td Fri Jun 22 01:39:52 2018
@@ -365,12 +365,15 @@ def az_extloadi8_local : LocalLoad <az_e
def sextloadi8_local : LocalLoad <sextloadi8>;
def az_extloadi16_local : LocalLoad <az_extloadi16>;
def sextloadi16_local : LocalLoad <sextloadi16>;
+def atomic_load_32_local : LocalLoad<atomic_load_32>;
+def atomic_load_64_local : LocalLoad<atomic_load_64>;
def store_local : LocalStore <store>;
def truncstorei8_local : LocalStore <truncstorei8>;
def truncstorei16_local : LocalStore <truncstorei16>;
def store_local_hi16 : StoreHi16 <truncstorei16>, LocalAddress;
def truncstorei8_local_hi16 : StoreHi16<truncstorei8>, LocalAddress;
+def atomic_store_local : LocalStore <atomic_store>;
def load_align8_local : Aligned8Bytes <
(ops node:$ptr), (load_local node:$ptr)
Modified: llvm/trunk/lib/Target/AMDGPU/DSInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/DSInstructions.td?rev=335325&r1=335324&r2=335325&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/DSInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/DSInstructions.td Fri Jun 22 01:39:52 2018
@@ -647,6 +647,8 @@ defm : DSReadPat_mc <DS_READ_I16, i32, "
defm : DSReadPat_mc <DS_READ_U16, i32, "az_extloadi16_local">;
defm : DSReadPat_mc <DS_READ_U16, i16, "load_local">;
defm : DSReadPat_mc <DS_READ_B32, i32, "load_local">;
+defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
+defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">;
let AddedComplexity = 100 in {
@@ -683,11 +685,30 @@ multiclass DSWritePat_mc <DS_Pseudo inst
}
}
+// Irritatingly, atomic_store reverses the order of operands from a
+// normal store.
+class DSAtomicWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
+ (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
+ (inst $ptr, $value, (as_i16imm $offset), (i1 0))
+>;
+
+multiclass DSAtomicWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
+ let OtherPredicates = [LDSRequiresM0Init] in {
+ def : DSAtomicWritePat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
+ }
+
+ let OtherPredicates = [NotLDSRequiresM0Init] in {
+ def : DSAtomicWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
+ }
+}
+
defm : DSWritePat_mc <DS_WRITE_B8, i32, "truncstorei8_local">;
defm : DSWritePat_mc <DS_WRITE_B16, i32, "truncstorei16_local">;
defm : DSWritePat_mc <DS_WRITE_B8, i16, "truncstorei8_local">;
defm : DSWritePat_mc <DS_WRITE_B16, i16, "store_local">;
defm : DSWritePat_mc <DS_WRITE_B32, i32, "store_local">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B32, i32, "atomic_store_local">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B64, i64, "atomic_store_local">;
let OtherPredicates = [D16PreservesUnusedBits] in {
def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_local_hi16>;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=335325&r1=335324&r2=335325&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Fri Jun 22 01:39:52 2018
@@ -233,6 +233,10 @@ def AMDGPUld_glue : SDNode <"ISD::LOAD",
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
>;
+def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
+>;
+
def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr), [{
return cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
}]>;
@@ -241,6 +245,18 @@ def load_glue : PatFrag <(ops node:$ptr)
return cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
}]>;
+def atomic_load_32_glue : PatFrag<(ops node:$ptr),
+ (AMDGPUatomic_ld_glue node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i32;
+}
+
+def atomic_load_64_glue : PatFrag<(ops node:$ptr),
+ (AMDGPUatomic_ld_glue node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i64;
+}
+
def extload_glue : PatFrag<(ops node:$ptr), (load_glue node:$ptr), [{
return cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
}]>;
@@ -286,12 +302,22 @@ def az_extloadi8_local_m0 : LoadFrag<az_
def az_extloadi16_local_m0 : LoadFrag<az_extloadi16_glue>, LocalAddress;
def load_align8_local_m0 : LoadFrag <load_glue_align8>, LocalAddress;
def load_align16_local_m0 : LoadFrag <load_glue_align16>, LocalAddress;
+def atomic_load_32_local_m0 : LoadFrag<atomic_load_32_glue>, LocalAddress;
+def atomic_load_64_local_m0 : LoadFrag<atomic_load_64_glue>, LocalAddress;
def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
>;
+def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
+>;
+
+def atomic_store_glue : PatFrag<(ops node:$ptr, node:$val),
+ (AMDGPUatomic_st_glue node:$ptr, node:$val)> {
+}
+
def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr),
(AMDGPUst_glue node:$val, node:$ptr), [{
return cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
@@ -328,6 +354,7 @@ def store_glue_align16 : Aligned16Bytes
def store_local_m0 : StoreFrag<store_glue>, LocalAddress;
def truncstorei8_local_m0 : StoreFrag<truncstorei8_glue>, LocalAddress;
def truncstorei16_local_m0 : StoreFrag<truncstorei16_glue>, LocalAddress;
+def atomic_store_local_m0 : StoreFrag<AMDGPUatomic_st_glue>, LocalAddress;
def store_align8_local_m0 : StoreFrag<store_glue_align8>, LocalAddress;
def store_align16_local_m0 : StoreFrag<store_glue_align16>, LocalAddress;
Added: llvm/trunk/test/CodeGen/AMDGPU/atomic_load_local.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/atomic_load_local.ll?rev=335325&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/atomic_load_local.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/atomic_load_local.ll Fri Jun 22 01:39:52 2018
@@ -0,0 +1,52 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_i32:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_b32 v0, v0{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i32 @atomic_load_monotonic_i32(i32 addrspace(3)* %ptr) {
+ %load = load atomic i32, i32 addrspace(3)* %ptr monotonic, align 4
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_i32_offset:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i32 @atomic_load_monotonic_i32_offset(i32 addrspace(3)* %ptr) {
+ %gep = getelementptr inbounds i32, i32 addrspace(3)* %ptr, i32 16
+ %load = load atomic i32, i32 addrspace(3)* %gep monotonic, align 4
+ ret i32 %load
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_i64:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_b64 v[0:1], v0{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i64 @atomic_load_monotonic_i64(i64 addrspace(3)* %ptr) {
+ %load = load atomic i64, i64 addrspace(3)* %ptr monotonic, align 8
+ ret i64 %load
+}
+
+; GCN-LABEL: {{^}}atomic_load_monotonic_i64_offset:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define i64 @atomic_load_monotonic_i64_offset(i64 addrspace(3)* %ptr) {
+ %gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i64 16
+ %load = load atomic i64, i64 addrspace(3)* %gep monotonic, align 8
+ ret i64 %load
+}
Added: llvm/trunk/test/CodeGen/AMDGPU/atomic_store_local.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/atomic_store_local.ll?rev=335325&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/atomic_store_local.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/atomic_store_local.ll Fri Jun 22 01:39:52 2018
@@ -0,0 +1,53 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+
+; GCN-LABEL: {{^}}atomic_store_monotonic_i32:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b32 v0, v1{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_i32(i32 addrspace(3)* %ptr, i32 %val) {
+ store atomic i32 %val, i32 addrspace(3)* %ptr monotonic, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i32:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b32 v0, v1 offset:64{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_offset_i32(i32 addrspace(3)* %ptr, i32 %val) {
+ %gep = getelementptr inbounds i32, i32 addrspace(3)* %ptr, i32 16
+ store atomic i32 %val, i32 addrspace(3)* %gep monotonic, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_store_monotonic_i64:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b64 v0, v[1:2]{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_i64(i64 addrspace(3)* %ptr, i64 %val) {
+ store atomic i64 %val, i64 addrspace(3)* %ptr monotonic, align 8
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i64:
+; GCN: s_waitcnt
+; GFX9-NOT: s_mov_b32 m0
+; CI-NEXT: s_mov_b32 m0
+; GCN-NEXT: ds_write_b64 v0, v[1:2] offset:128{{$}}
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @atomic_store_monotonic_offset_i64(i64 addrspace(3)* %ptr, i64 %val) {
+ %gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i64 16
+ store atomic i64 %val, i64 addrspace(3)* %gep monotonic, align 8
+ ret void
+}
+
More information about the llvm-commits
mailing list