[llvm] r364814 - AMDGPU: Support GDS atomics
Nicolai Haehnle via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 1 10:17:45 PDT 2019
Author: nha
Date: Mon Jul 1 10:17:45 2019
New Revision: 364814
URL: http://llvm.org/viewvc/llvm-project?rev=364814&view=rev
Log:
AMDGPU: Support GDS atomics
Summary:
Original patch by Marek Olšák
Change-Id: Ia97d5d685a63a377d86e82942436d1fe6e429bab
Reviewers: mareko, arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, jfb, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63452
Added:
llvm/trunk/test/CodeGen/AMDGPU/gds-atomic.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/trunk/lib/Target/AMDGPU/DSInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp?rev=364814&r1=364813&r2=364814&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Mon Jul 1 10:17:45 2019
@@ -568,8 +568,6 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0
const SITargetLowering& Lowering =
*static_cast<const SITargetLowering*>(getTargetLowering());
- // Write max value to m0 before each load operation
-
assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N),
@@ -587,10 +585,17 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0
}
SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
- if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
- !Subtarget->ldsRequiresM0Init())
- return N;
- return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
+ unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
+ if (AS == AMDGPUAS::LOCAL_ADDRESS) {
+ if (Subtarget->ldsRequiresM0Init())
+ return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
+ } else if (AS == AMDGPUAS::REGION_ADDRESS) {
+ MachineFunction &MF = CurDAG->getMachineFunction();
+ unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
+ return
+ glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
+ }
+ return N;
}
MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td?rev=364814&r1=364813&r2=364814&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td Mon Jul 1 10:17:45 2019
@@ -353,6 +353,10 @@ class LocalAddress : CodePatPred<[{
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}]>;
+class RegionAddress : CodePatPred<[{
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
+}]>;
+
class GlobalAddress : CodePatPred<[{
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
}]>;
@@ -402,6 +406,9 @@ class PrivateStore <SDPatternOperator op
class LocalLoad <SDPatternOperator op> : LoadFrag <op>, LocalAddress;
class LocalStore <SDPatternOperator op> : StoreFrag <op>, LocalAddress;
+class RegionLoad <SDPatternOperator op> : LoadFrag <op>, RegionAddress;
+class RegionStore <SDPatternOperator op> : StoreFrag <op>, RegionAddress;
+
class GlobalLoad <SDPatternOperator op> : LoadFrag<op>, GlobalLoadAddress;
class GlobalStore <SDPatternOperator op> : StoreFrag<op>, GlobalAddress;
@@ -497,6 +504,13 @@ class local_binary_atomic_op<SDNode atom
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}]>;
+class region_binary_atomic_op<SDNode atomic_op> :
+ PatFrag<(ops node:$ptr, node:$value),
+ (atomic_op node:$ptr, node:$value), [{
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
+}]>;
+
+
def atomic_swap_local : local_binary_atomic_op<atomic_swap>;
def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>;
def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>;
@@ -521,6 +535,13 @@ class AtomicCmpSwapLocal <SDNode cmp_swa
return AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}]>;
+class AtomicCmpSwapRegion <SDNode cmp_swap_node> : PatFrag<
+ (ops node:$ptr, node:$cmp, node:$swap),
+ (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
+ AtomicSDNode *AN = cast<AtomicSDNode>(N);
+ return AN->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
+}]>;
+
def atomic_cmp_swap_local : AtomicCmpSwapLocal <atomic_cmp_swap>;
multiclass global_binary_atomic_op<SDNode atomic_op> {
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp?rev=364814&r1=364813&r2=364814&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp Mon Jul 1 10:17:45 2019
@@ -142,7 +142,7 @@ void AMDGPUTTIImpl::getUnrollingPreferen
unsigned Threshold = 0;
if (AS == AMDGPUAS::PRIVATE_ADDRESS)
Threshold = ThresholdPrivate;
- else if (AS == AMDGPUAS::LOCAL_ADDRESS)
+ else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS)
Threshold = ThresholdLocal;
else
continue;
@@ -160,7 +160,8 @@ void AMDGPUTTIImpl::getUnrollingPreferen
unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0;
if (AllocaSize > MaxAlloca)
continue;
- } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
+ } else if (AS == AMDGPUAS::LOCAL_ADDRESS ||
+ AS == AMDGPUAS::REGION_ADDRESS) {
LocalGEPsSeen++;
// Inhibit unroll for local memory if we have seen addressing not to
// a variable, most likely we will be unable to combine it.
Modified: llvm/trunk/lib/Target/AMDGPU/DSInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/DSInstructions.td?rev=364814&r1=364813&r2=364814&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/DSInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/DSInstructions.td Mon Jul 1 10:17:45 2019
@@ -601,9 +601,9 @@ def : GCNPat <
(DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0))
>;
-class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
+class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
(vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
- (inst $ptr, (as_i16imm $offset), (i1 0))
+ (inst $ptr, (as_i16imm $offset), (i1 gds))
>;
multiclass DSReadPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
@@ -657,9 +657,9 @@ def : DSReadPat_D16<DS_READ_I8_D16, sext
def : DSReadPat_D16<DS_READ_I8_D16, sextloadi8_d16_lo_local, v2f16>;
}
-class DSWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
+class DSWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
(frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
- (inst $ptr, $value, (as_i16imm $offset), (i1 0))
+ (inst $ptr, $value, (as_i16imm $offset), (i1 gds))
>;
multiclass DSWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
@@ -734,75 +734,79 @@ defm : DSWritePat_mc <DS_WRITE_B64, v2i3
defm : DSWritePat_mc <DS_WRITE_B128, v4i32, "store_align16_local">;
} // End AddedComplexity = 100
-class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
+class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
- (inst $ptr, $value, (as_i16imm $offset), (i1 0))
+ (inst $ptr, $value, (as_i16imm $offset), (i1 gds))
>;
multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
let OtherPredicates = [LDSRequiresM0Init] in {
- def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
+ def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_local_m0")>;
}
let OtherPredicates = [NotLDSRequiresM0Init] in {
def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
- !cast<PatFrag>(frag)>;
+ !cast<PatFrag>(frag#"_local")>;
}
+
+ def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0"), 1>;
}
-class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
+class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
- (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0))
+ (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 gds))
>;
multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, ValueType vt, string frag> {
let OtherPredicates = [LDSRequiresM0Init] in {
- def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_m0")>;
+ def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_local_m0")>;
}
let OtherPredicates = [NotLDSRequiresM0Init] in {
def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
- !cast<PatFrag>(frag)>;
+ !cast<PatFrag>(frag#"_local")>;
}
+
+ def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0"), 1>;
}
// 32-bit atomics.
-defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B32, i32, "atomic_swap_local">;
-defm : DSAtomicRetPat_mc<DS_ADD_RTN_U32, i32, "atomic_load_add_local">;
-defm : DSAtomicRetPat_mc<DS_SUB_RTN_U32, i32, "atomic_load_sub_local">;
-defm : DSAtomicRetPat_mc<DS_INC_RTN_U32, i32, "atomic_inc_local">;
-defm : DSAtomicRetPat_mc<DS_DEC_RTN_U32, i32, "atomic_dec_local">;
-defm : DSAtomicRetPat_mc<DS_AND_RTN_B32, i32, "atomic_load_and_local">;
-defm : DSAtomicRetPat_mc<DS_OR_RTN_B32, i32, "atomic_load_or_local">;
-defm : DSAtomicRetPat_mc<DS_XOR_RTN_B32, i32, "atomic_load_xor_local">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_I32, i32, "atomic_load_min_local">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_I32, i32, "atomic_load_max_local">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_U32, i32, "atomic_load_umin_local">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_U32, i32, "atomic_load_umax_local">;
-defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B32, i32, "atomic_cmp_swap_local">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_F32, f32, "atomic_load_fmin_local">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_F32, f32, "atomic_load_fmax_local">;
-defm : DSAtomicRetPat_mc<DS_ADD_RTN_F32, f32, "atomic_load_fadd_local">;
+defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B32, i32, "atomic_swap">;
+defm : DSAtomicRetPat_mc<DS_ADD_RTN_U32, i32, "atomic_load_add">;
+defm : DSAtomicRetPat_mc<DS_SUB_RTN_U32, i32, "atomic_load_sub">;
+defm : DSAtomicRetPat_mc<DS_INC_RTN_U32, i32, "atomic_inc">;
+defm : DSAtomicRetPat_mc<DS_DEC_RTN_U32, i32, "atomic_dec">;
+defm : DSAtomicRetPat_mc<DS_AND_RTN_B32, i32, "atomic_load_and">;
+defm : DSAtomicRetPat_mc<DS_OR_RTN_B32, i32, "atomic_load_or">;
+defm : DSAtomicRetPat_mc<DS_XOR_RTN_B32, i32, "atomic_load_xor">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_I32, i32, "atomic_load_min">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_I32, i32, "atomic_load_max">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_U32, i32, "atomic_load_umin">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_U32, i32, "atomic_load_umax">;
+defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B32, i32, "atomic_cmp_swap">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_F32, f32, "atomic_load_fmin">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_F32, f32, "atomic_load_fmax">;
+defm : DSAtomicRetPat_mc<DS_ADD_RTN_F32, f32, "atomic_load_fadd">;
// 64-bit atomics.
-defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap_local">;
-defm : DSAtomicRetPat_mc<DS_ADD_RTN_U64, i64, "atomic_load_add_local">;
-defm : DSAtomicRetPat_mc<DS_SUB_RTN_U64, i64, "atomic_load_sub_local">;
-defm : DSAtomicRetPat_mc<DS_INC_RTN_U64, i64, "atomic_inc_local">;
-defm : DSAtomicRetPat_mc<DS_DEC_RTN_U64, i64, "atomic_dec_local">;
-defm : DSAtomicRetPat_mc<DS_AND_RTN_B64, i64, "atomic_load_and_local">;
-defm : DSAtomicRetPat_mc<DS_OR_RTN_B64, i64, "atomic_load_or_local">;
-defm : DSAtomicRetPat_mc<DS_XOR_RTN_B64, i64, "atomic_load_xor_local">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_I64, i64, "atomic_load_min_local">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_I64, i64, "atomic_load_max_local">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_U64, i64, "atomic_load_umin_local">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_U64, i64, "atomic_load_umax_local">;
+defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap">;
+defm : DSAtomicRetPat_mc<DS_ADD_RTN_U64, i64, "atomic_load_add">;
+defm : DSAtomicRetPat_mc<DS_SUB_RTN_U64, i64, "atomic_load_sub">;
+defm : DSAtomicRetPat_mc<DS_INC_RTN_U64, i64, "atomic_inc">;
+defm : DSAtomicRetPat_mc<DS_DEC_RTN_U64, i64, "atomic_dec">;
+defm : DSAtomicRetPat_mc<DS_AND_RTN_B64, i64, "atomic_load_and">;
+defm : DSAtomicRetPat_mc<DS_OR_RTN_B64, i64, "atomic_load_or">;
+defm : DSAtomicRetPat_mc<DS_XOR_RTN_B64, i64, "atomic_load_xor">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_I64, i64, "atomic_load_min">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_I64, i64, "atomic_load_max">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_U64, i64, "atomic_load_umin">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_U64, i64, "atomic_load_umax">;
-defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B64, i64, "atomic_cmp_swap_local">;
+defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B64, i64, "atomic_cmp_swap">;
def : Pat <
(SIds_ordered_count i32:$value, i16:$offset),
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=364814&r1=364813&r2=364814&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Mon Jul 1 10:17:45 2019
@@ -1173,7 +1173,7 @@ bool SITargetLowering::canMergeStoresTo(
} else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
unsigned MaxPrivateBits = 8 * getSubtarget()->getMaxPrivateElementSize();
return (MemVT.getSizeInBits() <= MaxPrivateBits);
- } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
+ } else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
return (MemVT.getSizeInBits() <= 2 * 32);
}
return true;
@@ -7135,7 +7135,7 @@ SDValue SITargetLowering::LowerLOAD(SDVa
default:
llvm_unreachable("unsupported private_element_size");
}
- } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
+ } else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
// Use ds_read_b128 if possible.
if (Subtarget->useDS128() && Load->getAlignment() >= 16 &&
MemVT.getStoreSize() == 16)
@@ -7557,7 +7557,7 @@ SDValue SITargetLowering::LowerSTORE(SDV
default:
llvm_unreachable("unsupported private_element_size");
}
- } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
+ } else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
// Use ds_write_b128 if possible.
if (Subtarget->useDS128() && Store->getAlignment() >= 16 &&
VT.getStoreSize() == 16 && NumElements != 3)
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=364814&r1=364813&r2=364814&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Mon Jul 1 10:17:45 2019
@@ -505,6 +505,7 @@ multiclass SIAtomicM0Glue2 <string op_na
>;
def _local_m0 : local_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
+ def _region_m0 : region_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
}
defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
@@ -528,6 +529,7 @@ def atomic_cmp_swap_glue : SDNode <"ISD:
>;
def atomic_cmp_swap_local_m0 : AtomicCmpSwapLocal<atomic_cmp_swap_glue>;
+def atomic_cmp_swap_region_m0 : AtomicCmpSwapRegion<atomic_cmp_swap_glue>;
def as_i1imm : SDNodeXForm<imm, [{
Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp?rev=364814&r1=364813&r2=364814&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp Mon Jul 1 10:17:45 2019
@@ -46,7 +46,8 @@ SIMachineFunctionInfo::SIMachineFunction
ImplicitBufferPtr(false),
ImplicitArgPtr(false),
GITPtrHigh(0xffffffff),
- HighBitsOf32BitAddress(0) {
+ HighBitsOf32BitAddress(0),
+ GDSSize(0) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const Function &F = MF.getFunction();
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
@@ -159,6 +160,10 @@ SIMachineFunctionInfo::SIMachineFunction
S = A.getValueAsString();
if (!S.empty())
S.consumeInteger(0, HighBitsOf32BitAddress);
+
+ S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
+ if (!S.empty())
+ S.consumeInteger(0, GDSSize);
}
void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h?rev=364814&r1=364813&r2=364814&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h Mon Jul 1 10:17:45 2019
@@ -262,6 +262,7 @@ private:
unsigned GITPtrHigh;
unsigned HighBitsOf32BitAddress;
+ unsigned GDSSize;
// Current recorded maximum possible occupancy.
unsigned Occupancy;
@@ -489,6 +490,10 @@ public:
return HighBitsOf32BitAddress;
}
+ unsigned getGDSSize() const {
+ return GDSSize;
+ }
+
unsigned getNumUserSGPRs() const {
return NumUserSGPRs;
}
Added: llvm/trunk/test/CodeGen/AMDGPU/gds-atomic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/gds-atomic.ll?rev=364814&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/gds-atomic.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/gds-atomic.ll Mon Jul 1 10:17:45 2019
@@ -0,0 +1,128 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
+
+; FUNC-LABEL: {{^}}atomic_add_ret_gds:
+; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
+; GCN-DAG: s_movk_i32 m0, 0x1000
+; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
+define amdgpu_kernel void @atomic_add_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 {
+ %val = atomicrmw volatile add i32 addrspace(2)* %gds, i32 5 acq_rel
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_add_ret_gds_const_offset:
+; GCN: s_movk_i32 m0, 0x80
+; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20 gds
+define amdgpu_kernel void @atomic_add_ret_gds_const_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #0 {
+ %gep = getelementptr i32, i32 addrspace(2)* %gds, i32 5
+ %val = atomicrmw volatile add i32 addrspace(2)* %gep, i32 5 acq_rel
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_sub_ret_gds:
+; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
+; GCN-DAG: s_movk_i32 m0, 0x1000
+; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
+define amdgpu_kernel void @atomic_sub_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 {
+ %val = atomicrmw sub i32 addrspace(2)* %gds, i32 5 acq_rel
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_and_ret_gds:
+; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
+; GCN-DAG: s_movk_i32 m0, 0x1000
+; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
+define amdgpu_kernel void @atomic_and_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 {
+ %val = atomicrmw and i32 addrspace(2)* %gds, i32 5 acq_rel
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_or_ret_gds:
+; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
+; GCN-DAG: s_movk_i32 m0, 0x1000
+; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
+define amdgpu_kernel void @atomic_or_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 {
+ %val = atomicrmw or i32 addrspace(2)* %gds, i32 5 acq_rel
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xor_ret_gds:
+; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
+; GCN-DAG: s_movk_i32 m0, 0x1000
+; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
+define amdgpu_kernel void @atomic_xor_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 {
+ %val = atomicrmw xor i32 addrspace(2)* %gds, i32 5 acq_rel
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umin_ret_gds:
+; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
+; GCN-DAG: s_movk_i32 m0, 0x1000
+; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
+define amdgpu_kernel void @atomic_umin_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 {
+ %val = atomicrmw umin i32 addrspace(2)* %gds, i32 5 acq_rel
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umax_ret_gds:
+; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
+; GCN-DAG: s_movk_i32 m0, 0x1000
+; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
+define amdgpu_kernel void @atomic_umax_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 {
+ %val = atomicrmw umax i32 addrspace(2)* %gds, i32 5 acq_rel
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_imin_ret_gds:
+; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
+; GCN-DAG: s_movk_i32 m0, 0x1000
+; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
+define amdgpu_kernel void @atomic_imin_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 {
+ %val = atomicrmw min i32 addrspace(2)* %gds, i32 5 acq_rel
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_imax_ret_gds:
+; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
+; GCN-DAG: s_movk_i32 m0, 0x1000
+; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
+define amdgpu_kernel void @atomic_imax_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 {
+ %val = atomicrmw max i32 addrspace(2)* %gds, i32 5 acq_rel
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xchg_ret_gds:
+; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
+; GCN-DAG: s_movk_i32 m0, 0x1000
+; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v[[OFF]], v{{[0-9]+}} gds
+define amdgpu_kernel void @atomic_xchg_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 {
+ %val = atomicrmw xchg i32 addrspace(2)* %gds, i32 5 acq_rel
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_cmpxchg_ret_gds:
+; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
+; GCN-DAG: s_movk_i32 m0, 0x1000
+; GCN: ds_cmpst_rtn_b32 v{{[0-9]+}}, v[[OFF:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} gds
+define amdgpu_kernel void @atomic_cmpxchg_ret_gds(i32 addrspace(1)* %out, i32 addrspace(2)* %gds) #1 {
+ %val = cmpxchg i32 addrspace(2)* %gds, i32 0, i32 1 acquire acquire
+ %x = extractvalue { i32, i1 } %val, 0
+ store i32 %x, i32 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind "amdgpu-gds-size"="128" }
+attributes #1 = { nounwind "amdgpu-gds-size"="4096" }
More information about the llvm-commits
mailing list