[llvm] c2093b8 - [AMDGPU] Add target features for GDS and GWS
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 2 01:02:20 PDT 2023
Author: Jay Foad
Date: 2023-08-02T09:02:07+01:00
New Revision: c2093b85044d87805c39267c65ac9032d5454e0e
URL: https://github.com/llvm/llvm-project/commit/c2093b85044d87805c39267c65ac9032d5454e0e
DIFF: https://github.com/llvm/llvm-project/commit/c2093b85044d87805c39267c65ac9032d5454e0e.diff
LOG: [AMDGPU] Add target features for GDS and GWS
GFX9 subtargets from GFX90A onwards lack GDS but still have GWS.
Differential Revision: https://reviews.llvm.org/D156713
Added:
llvm/test/CodeGen/AMDGPU/gds-unsupported.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/DSInstructions.td
llvm/lib/Target/AMDGPU/GCNProcessors.td
llvm/lib/Target/AMDGPU/GCNSubtarget.h
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index b178623a319d04..63044b08f48300 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -899,6 +899,18 @@ def FeatureArchitectedSGPRs : SubtargetFeature<"architected-sgprs",
"Enable the architected SGPRs"
>;
+def FeatureGDS : SubtargetFeature<"gds",
+ "HasGDS",
+ "true",
+ "Has Global Data Share"
+>;
+
+def FeatureGWS : SubtargetFeature<"gws",
+ "HasGWS",
+ "true",
+ "Has Global Wave Sync"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
@@ -917,7 +929,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
[FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel,
- FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts
+ FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
+ FeatureGDS, FeatureGWS
]
>;
@@ -928,7 +941,7 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess,
- FeatureImageInsts
+ FeatureImageInsts, FeatureGDS, FeatureGWS
]
>;
@@ -943,7 +956,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts,
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32,
- FeatureUnalignedBufferAccess, FeatureImageInsts
+ FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS
]
>;
@@ -961,7 +974,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK,
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess,
- FeatureNegativeScratchOffsetBug
+ FeatureNegativeScratchOffsetBug, FeatureGWS
]
>;
@@ -980,7 +993,8 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts,
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16,
- FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts
+ FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts,
+ FeatureGDS, FeatureGWS
]
>;
@@ -999,7 +1013,8 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts,
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
FeatureA16, FeatureFastDenormalF32, FeatureG16,
- FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess
+ FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureGDS,
+ FeatureGWS
]
>;
@@ -1104,28 +1119,32 @@ def FeatureISAVersion9_0_MI_Common : FeatureSet<
def FeatureISAVersion9_0_0 : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureMadMixInsts,
+ [FeatureGDS,
+ FeatureMadMixInsts,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
FeatureImageGather4D16Bug])>;
def FeatureISAVersion9_0_2 : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureMadMixInsts,
+ [FeatureGDS,
+ FeatureMadMixInsts,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
FeatureImageGather4D16Bug])>;
def FeatureISAVersion9_0_4 : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureDsSrc2Insts,
+ [FeatureGDS,
+ FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
FeatureFmaMixInsts,
FeatureImageGather4D16Bug])>;
def FeatureISAVersion9_0_6 : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
- [HalfRate64Ops,
+ [FeatureGDS,
+ HalfRate64Ops,
FeatureFmaMixInsts,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
@@ -1139,7 +1158,8 @@ def FeatureISAVersion9_0_6 : FeatureSet<
def FeatureISAVersion9_0_8 : FeatureSet<
!listconcat(FeatureISAVersion9_0_MI_Common.Features,
- [HalfRate64Ops,
+ [FeatureGDS,
+ HalfRate64Ops,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
FeatureAtomicBufferGlobalPkAddF16NoRtnInsts,
@@ -1148,7 +1168,8 @@ def FeatureISAVersion9_0_8 : FeatureSet<
def FeatureISAVersion9_0_9 : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureMadMixInsts,
+ [FeatureGDS,
+ FeatureMadMixInsts,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
FeatureImageInsts,
@@ -1168,7 +1189,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
def FeatureISAVersion9_0_C : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureMadMixInsts,
+ [FeatureGDS,
+ FeatureMadMixInsts,
FeatureDsSrc2Insts,
FeatureExtendedImageInsts,
FeatureImageGather4D16Bug])>;
@@ -1836,6 +1858,10 @@ def HasMADIntraFwdBug : Predicate<"Subtarget->hasMADIntraFwdBug()">;
def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">;
+def HasGDS : Predicate<"Subtarget->hasGDS()">;
+
+def HasGWS : Predicate<"Subtarget->hasGWS()">;
+
// Include AMDGPU TD files
include "SISchedule.td"
include "GCNProcessors.td"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 825c6f0acd0f20..73820375a1e6e4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2369,8 +2369,9 @@ static unsigned gwsIntrinToOpcode(unsigned IntrID) {
}
void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
- if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
- !Subtarget->hasGWSSemaReleaseAll()) {
+ if (!Subtarget->hasGWS() ||
+ (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
+ !Subtarget->hasGWSSemaReleaseAll())) {
// Let this error.
SelectCode(N);
return;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 6a55c6242da6c3..e9df1892396617 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1581,8 +1581,8 @@ static unsigned gwsIntrinToOpcode(unsigned IntrID) {
bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
Intrinsic::ID IID) const {
- if (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
- !STI.hasGWSSemaReleaseAll())
+ if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
+ !STI.hasGWSSemaReleaseAll()))
return false;
// intrinsic ID, vsrc, offset
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 85a3f763cd5a20..90a656ad50b463 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -969,8 +969,10 @@ multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
!cast<PatFrag>(frag#"_local_"#vt.Size)>;
}
- def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
- /* complexity */ 0, /* gds */ 1>;
+ let OtherPredicates = [HasGDS] in {
+ def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
+ /* complexity */ 0, /* gds */ 1>;
+ }
}
multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
@@ -989,12 +991,14 @@ multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
!cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>;
}
- def : DSAtomicRetPat<inst, vt,
- !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
- /* complexity */ 0, /* gds */ 1>;
- def : DSAtomicRetPat<noRetInst, vt,
- !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
- /* complexity */ 1, /* gds */ 1>;
+ let OtherPredicates = [HasGDS] in {
+ def : DSAtomicRetPat<inst, vt,
+ !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
+ /* complexity */ 0, /* gds */ 1>;
+ def : DSAtomicRetPat<noRetInst, vt,
+ !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
+ /* complexity */ 1, /* gds */ 1>;
+ }
}
@@ -1024,10 +1028,12 @@ multiclass DSAtomicCmpXChgSwapped_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueT
/* complexity */ 1>;
}
- def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
- /* complexity */ 0, /* gds */ 1>;
- def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
- /* complexity */ 1, /* gds */ 1>;
+ let OtherPredicates = [HasGDS] in {
+ def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
+ /* complexity */ 0, /* gds */ 1>;
+ def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
+ /* complexity */ 1, /* gds */ 1>;
+ }
}
} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10
@@ -1047,10 +1053,12 @@ multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt,
def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
!cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>;
- def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
- /* complexity */ 0, /* gds */ 1>;
- def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
- /* complexity */ 1, /* gds */ 1>;
+ let OtherPredicates = [HasGDS] in {
+ def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
+ /* complexity */ 0, /* gds */ 1>;
+ def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
+ /* complexity */ 1, /* gds */ 1>;
+ }
}
} // End SubtargetPredicate = isGFX11Plus
diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index b9c9358f88b9d7..b4ae3a7a08e4e8 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -9,11 +9,11 @@
// The code produced for "generic" is only useful for tests and cannot
// reasonably be expected to execute on any particular target.
def : ProcessorModel<"generic", NoSchedModel,
- [FeatureWavefrontSize64]
+ [FeatureWavefrontSize64, FeatureGDS, FeatureGWS]
>;
def : ProcessorModel<"generic-hsa", NoSchedModel,
- [FeatureWavefrontSize64, FeatureFlatAddressSpace]
+ [FeatureWavefrontSize64, FeatureGDS, FeatureGWS, FeatureFlatAddressSpace]
>;
//===------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index ef5470df876d09..e1a4a1e76f84ae 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -180,6 +180,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasArchitectedFlatScratch = false;
bool EnableFlatScratch = false;
bool HasArchitectedSGPRs = false;
+ bool HasGDS = false;
+ bool HasGWS = false;
bool AddNoCarryInsts = false;
bool HasUnpackedD16VMem = false;
bool LDSMisalignedBug = false;
@@ -1155,6 +1157,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// \returns true if the architected SGPRs are enabled.
bool hasArchitectedSGPRs() const { return HasArchitectedSGPRs; }
+ /// \returns true if Global Data Share is supported.
+ bool hasGDS() const { return HasGDS; }
+
+ /// \returns true if Global Wave Sync is supported.
+ bool hasGWS() const { return HasGWS; }
+
/// \returns true if the machine has merged shaders in which s0-s7 are
/// reserved by the hardware and user SGPRs start at s8
bool hasMergedShaders() const {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 7bc8a24cebcd03..0857e841bf8292 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1414,7 +1414,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
if (AS == AMDGPUAS::PRIVATE_ADDRESS)
return isLegalMUBUFAddressingMode(AM);
- if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
+ if (AS == AMDGPUAS::LOCAL_ADDRESS ||
+ (AS == AMDGPUAS::REGION_ADDRESS && Subtarget->hasGDS())) {
// Basic, single offset DS instructions allow a 16-bit unsigned immediate
// field.
// XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
diff --git a/llvm/test/CodeGen/AMDGPU/gds-unsupported.ll b/llvm/test/CodeGen/AMDGPU/gds-unsupported.ll
new file mode 100644
index 00000000000000..7ed21baaf24391
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/gds-unsupported.ll
@@ -0,0 +1,10 @@
+; RUN: not --crash llc -march=amdgcn -mcpu=gfx90a < %s 2>&1 | FileCheck %s
+
+; GDS is not supported on GFX12+
+; CHECK: LLVM ERROR: Cannot select: {{.*}} AtomicLoadAdd
+
+define amdgpu_kernel void @atomic_add_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 {
+ %val = atomicrmw volatile add ptr addrspace(2) %gds, i32 5 acq_rel
+ store i32 %val, ptr addrspace(1) %out
+ ret void
+}
More information about the llvm-commits
mailing list