[llvm] c2093b8 - [AMDGPU] Add target features for GDS and GWS

Wed Aug 2 01:02:20 PDT 2023

Author: Jay Foad
Date: 2023-08-02T09:02:07+01:00
New Revision: c2093b85044d87805c39267c65ac9032d5454e0e

URL: https://github.com/llvm/llvm-project/commit/c2093b85044d87805c39267c65ac9032d5454e0e
DIFF: https://github.com/llvm/llvm-project/commit/c2093b85044d87805c39267c65ac9032d5454e0e.diff

LOG: [AMDGPU] Add target features for GDS and GWS

GFX9 subtargets from GFX90A onwards lack GDS but still have GWS.

Differential Revision: https://reviews.llvm.org/D156713

Added: 
    llvm/test/CodeGen/AMDGPU/gds-unsupported.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPU.td
    llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/lib/Target/AMDGPU/DSInstructions.td
    llvm/lib/Target/AMDGPU/GCNProcessors.td
    llvm/lib/Target/AMDGPU/GCNSubtarget.h
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index b178623a319d04..63044b08f48300 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -899,6 +899,18 @@ def FeatureArchitectedSGPRs : SubtargetFeature<"architected-sgprs",
   "Enable the architected SGPRs"
 >;
 
+def FeatureGDS : SubtargetFeature<"gds",
+  "HasGDS",
+  "true",
+  "Has Global Data Share"
+>;
+
+def FeatureGWS : SubtargetFeature<"gws",
+  "HasGWS",
+  "true",
+  "Has Global Wave Sync"
+>;
+
 // Dummy feature used to disable assembler instructions.
 def FeatureDisable : SubtargetFeature<"",
   "FeatureDisable","true",
@@ -917,7 +929,8 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
   [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
   FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts,
   FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel,
-  FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts
+  FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
+  FeatureGDS, FeatureGWS
   ]
 >;
 
@@ -928,7 +941,7 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
   FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
   FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
   FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess,
-  FeatureImageInsts
+  FeatureImageInsts, FeatureGDS, FeatureGWS
   ]
 >;
 
@@ -943,7 +956,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
    FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts,
    FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
    FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32,
-   FeatureUnalignedBufferAccess, FeatureImageInsts
+   FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS
   ]
 >;
 
@@ -961,7 +974,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
    FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
    FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK,
    FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess,
-   FeatureNegativeScratchOffsetBug
+   FeatureNegativeScratchOffsetBug, FeatureGWS
   ]
 >;
 
@@ -980,7 +993,8 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
    FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts,
    FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
    FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16,
-   FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts
+   FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts,
+   FeatureGDS, FeatureGWS
   ]
 >;
 
@@ -999,7 +1013,8 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
    FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts,
    FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
    FeatureA16, FeatureFastDenormalF32, FeatureG16,
-   FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess
+   FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureGDS,
+   FeatureGWS
   ]
 >;
 
@@ -1104,28 +1119,32 @@ def FeatureISAVersion9_0_MI_Common : FeatureSet<
 
 def FeatureISAVersion9_0_0 : FeatureSet<
   !listconcat(FeatureISAVersion9_0_Common.Features,
-    [FeatureMadMixInsts,
+    [FeatureGDS,
+     FeatureMadMixInsts,
      FeatureDsSrc2Insts,
      FeatureExtendedImageInsts,
      FeatureImageGather4D16Bug])>;
 
 def FeatureISAVersion9_0_2 : FeatureSet<
   !listconcat(FeatureISAVersion9_0_Common.Features,
-    [FeatureMadMixInsts,
+    [FeatureGDS,
+     FeatureMadMixInsts,
      FeatureDsSrc2Insts,
      FeatureExtendedImageInsts,
      FeatureImageGather4D16Bug])>;
 
 def FeatureISAVersion9_0_4 : FeatureSet<
   !listconcat(FeatureISAVersion9_0_Common.Features,
-    [FeatureDsSrc2Insts,
+    [FeatureGDS,
+     FeatureDsSrc2Insts,
      FeatureExtendedImageInsts,
      FeatureFmaMixInsts,
      FeatureImageGather4D16Bug])>;
 
 def FeatureISAVersion9_0_6 : FeatureSet<
   !listconcat(FeatureISAVersion9_0_Common.Features,
-    [HalfRate64Ops,
+    [FeatureGDS,
+     HalfRate64Ops,
      FeatureFmaMixInsts,
      FeatureDsSrc2Insts,
      FeatureExtendedImageInsts,
@@ -1139,7 +1158,8 @@ def FeatureISAVersion9_0_6 : FeatureSet<
 
 def FeatureISAVersion9_0_8 : FeatureSet<
   !listconcat(FeatureISAVersion9_0_MI_Common.Features,
-    [HalfRate64Ops,
+    [FeatureGDS,
+     HalfRate64Ops,
      FeatureDsSrc2Insts,
      FeatureExtendedImageInsts,
      FeatureAtomicBufferGlobalPkAddF16NoRtnInsts,
@@ -1148,7 +1168,8 @@ def FeatureISAVersion9_0_8 : FeatureSet<
 
 def FeatureISAVersion9_0_9 : FeatureSet<
   !listconcat(FeatureISAVersion9_0_Common.Features,
-    [FeatureMadMixInsts,
+    [FeatureGDS,
+     FeatureMadMixInsts,
      FeatureDsSrc2Insts,
      FeatureExtendedImageInsts,
      FeatureImageInsts,
@@ -1168,7 +1189,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
 
 def FeatureISAVersion9_0_C : FeatureSet<
   !listconcat(FeatureISAVersion9_0_Common.Features,
-    [FeatureMadMixInsts,
+    [FeatureGDS,
+     FeatureMadMixInsts,
      FeatureDsSrc2Insts,
      FeatureExtendedImageInsts,
      FeatureImageGather4D16Bug])>;
@@ -1836,6 +1858,10 @@ def HasMADIntraFwdBug : Predicate<"Subtarget->hasMADIntraFwdBug()">;
 
 def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">;
 
+def HasGDS : Predicate<"Subtarget->hasGDS()">;
+
+def HasGWS : Predicate<"Subtarget->hasGWS()">;
+
 // Include AMDGPU TD files
 include "SISchedule.td"
 include "GCNProcessors.td"

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 825c6f0acd0f20..73820375a1e6e4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2369,8 +2369,9 @@ static unsigned gwsIntrinToOpcode(unsigned IntrID) {
 }
 
 void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
-  if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
-      !Subtarget->hasGWSSemaReleaseAll()) {
+  if (!Subtarget->hasGWS() ||
+      (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
+       !Subtarget->hasGWSSemaReleaseAll())) {
     // Let this error.
     SelectCode(N);
     return;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 6a55c6242da6c3..e9df1892396617 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1581,8 +1581,8 @@ static unsigned gwsIntrinToOpcode(unsigned IntrID) {
 
 bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
                                                      Intrinsic::ID IID) const {
-  if (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
-      !STI.hasGWSSemaReleaseAll())
+  if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
+                        !STI.hasGWSSemaReleaseAll()))
     return false;
 
   // intrinsic ID, vsrc, offset

diff  --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 85a3f763cd5a20..90a656ad50b463 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -969,8 +969,10 @@ multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
                          !cast<PatFrag>(frag#"_local_"#vt.Size)>;
   }
 
-  def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
-                       /* complexity */ 0, /* gds */ 1>;
+  let OtherPredicates = [HasGDS] in {
+    def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
+                         /* complexity */ 0, /* gds */ 1>;
+  }
 }
 
 multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
@@ -989,12 +991,14 @@ multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
                          !cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>;
   }
 
-  def : DSAtomicRetPat<inst, vt,
-                       !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
-                       /* complexity */ 0, /* gds */ 1>;
-  def : DSAtomicRetPat<noRetInst, vt,
-                       !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
-                       /* complexity */ 1, /* gds */ 1>;
+  let OtherPredicates = [HasGDS] in {
+    def : DSAtomicRetPat<inst, vt,
+                         !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
+                         /* complexity */ 0, /* gds */ 1>;
+    def : DSAtomicRetPat<noRetInst, vt,
+                         !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
+                         /* complexity */ 1, /* gds */ 1>;
+  }
 }
 
 
@@ -1024,10 +1028,12 @@ multiclass DSAtomicCmpXChgSwapped_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueT
                                  /* complexity */ 1>;
   }
 
-  def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
-                               /* complexity */ 0, /* gds */ 1>;
-  def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
-                               /* complexity */ 1, /* gds */ 1>;
+  let OtherPredicates = [HasGDS] in {
+    def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
+                                 /* complexity */ 0, /* gds */ 1>;
+    def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
+                                 /* complexity */ 1, /* gds */ 1>;
+  }
 }
 } // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10
 
@@ -1047,10 +1053,12 @@ multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt,
   def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
                         !cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>;
 
-  def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
-                        /* complexity */ 0, /* gds */ 1>;
-  def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
-                        /* complexity */ 1, /* gds */ 1>;
+  let OtherPredicates = [HasGDS] in {
+    def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
+                          /* complexity */ 0, /* gds */ 1>;
+    def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
+                          /* complexity */ 1, /* gds */ 1>;
+  }
 }
 } // End SubtargetPredicate = isGFX11Plus
 

diff  --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index b9c9358f88b9d7..b4ae3a7a08e4e8 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -9,11 +9,11 @@
 // The code produced for "generic" is only useful for tests and cannot
 // reasonably be expected to execute on any particular target.
 def : ProcessorModel<"generic", NoSchedModel,
-  [FeatureWavefrontSize64]
+  [FeatureWavefrontSize64, FeatureGDS, FeatureGWS]
 >;
 
 def : ProcessorModel<"generic-hsa", NoSchedModel,
-  [FeatureWavefrontSize64, FeatureFlatAddressSpace]
+  [FeatureWavefrontSize64, FeatureGDS, FeatureGWS, FeatureFlatAddressSpace]
 >;
 
 //===------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index ef5470df876d09..e1a4a1e76f84ae 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -180,6 +180,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool HasArchitectedFlatScratch = false;
   bool EnableFlatScratch = false;
   bool HasArchitectedSGPRs = false;
+  bool HasGDS = false;
+  bool HasGWS = false;
   bool AddNoCarryInsts = false;
   bool HasUnpackedD16VMem = false;
   bool LDSMisalignedBug = false;
@@ -1155,6 +1157,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   /// \returns true if the architected SGPRs are enabled.
   bool hasArchitectedSGPRs() const { return HasArchitectedSGPRs; }
 
+  /// \returns true if Global Data Share is supported.
+  bool hasGDS() const { return HasGDS; }
+
+  /// \returns true if Global Wave Sync is supported.
+  bool hasGWS() const { return HasGWS; }
+
   /// \returns true if the machine has merged shaders in which s0-s7 are
   /// reserved by the hardware and user SGPRs start at s8
   bool hasMergedShaders() const {

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 7bc8a24cebcd03..0857e841bf8292 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1414,7 +1414,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
   if (AS == AMDGPUAS::PRIVATE_ADDRESS)
     return isLegalMUBUFAddressingMode(AM);
 
-  if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
+  if (AS == AMDGPUAS::LOCAL_ADDRESS ||
+      (AS == AMDGPUAS::REGION_ADDRESS && Subtarget->hasGDS())) {
     // Basic, single offset DS instructions allow a 16-bit unsigned immediate
     // field.
     // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have

diff  --git a/llvm/test/CodeGen/AMDGPU/gds-unsupported.ll b/llvm/test/CodeGen/AMDGPU/gds-unsupported.ll
new file mode 100644
index 00000000000000..7ed21baaf24391
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/gds-unsupported.ll
@@ -0,0 +1,10 @@
+; RUN: not --crash llc -march=amdgcn -mcpu=gfx90a < %s 2>&1 | FileCheck %s
+
+; GDS is not supported on GFX12+
+; CHECK: LLVM ERROR: Cannot select: {{.*}} AtomicLoadAdd
+
+define amdgpu_kernel void @atomic_add_ret_gds(ptr addrspace(1) %out, ptr addrspace(2) %gds) #1 {
+  %val = atomicrmw volatile add ptr addrspace(2) %gds, i32 5 acq_rel
+  store i32 %val, ptr addrspace(1) %out
+  ret void
+}