[PATCH] D92483: AMDGPU - Use MUBUF instructions for global address space access

praveen velliengiri via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 2 07:07:52 PST 2020


pvellien created this revision.
pvellien added reviewers: rampitec, arsenm, sameerds, t-tye, scott.linder.
Herald added subscribers: llvm-commits, kerbowa, hiraditya, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl.
Herald added a project: LLVM.
pvellien requested review of this revision.
Herald added a subscriber: wdng.

Currently, the compiler crashes in instruction selection of global load/stores in gfx600 due to the lack of FLAT instructions. This patch fix the crash by selecting MUBUF instructions for global load/stores in gfx600.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D92483

Files:
  llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
  llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
  llvm/test/CodeGen/AMDGPU/si-global-buffer.ll


Index: llvm/test/CodeGen/AMDGPU/si-global-buffer.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/si-global-buffer.ll
@@ -0,0 +1,18 @@
+; RUN: llc --mtriple=amdgcn-amd-amdhsa --mcpu=gfx600 -mattr=+flat-for-global -verify-machineinstrs <%s | FileCheck -check-prefix=SI %s
+; RUN: llc --mtriple=amdgcn  --mcpu=gfx600 -mattr=+flat-for-global -verify-machineinstrs <%s | FileCheck -check-prefix=SI %s
+
+define void @test(i32 addrspace(1)* %out){
+  ; SI-LABEL: test:
+  ; SI:       ; %bb.0:
+  ; SI-NEXT:  s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+  ; SI-NEXT:  s_mov_b32 s6, 0
+  ; SI-NEXT:  s_mov_b32 s7, {{.*}}
+  ; SI-NEXT:  s_mov_b32 s4, s6
+  ; SI-NEXT:  s_mov_b32 s5, s6
+  ; SI-NEXT:  v_mov_b32_e32 v2, 0
+  ; SI-NEXT:  buffer_store_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
+  ; SI-NEXT:  s_waitcnt vmcnt(0) expcnt(0)
+  ; SI-NEXT:  s_setpc_b64 s[30:31]
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
Index: llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
+++ llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
@@ -533,10 +533,7 @@
 define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 {
 ; HSA-LABEL: @kern_lds_ptr_si(
 ; HSA-NEXT:    [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
-; HSA-NEXT:    [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_SI_KERNARG_SEGMENT]], i64 0
-; HSA-NEXT:    [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)*
-; HSA-NEXT:    [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:    store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4
+; HSA-NEXT:    store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4
 ; HSA-NEXT:    ret void
 ;
 ; MESA-LABEL: @kern_lds_ptr_si(
Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -118,6 +118,13 @@
     FlatForGlobal = true;
   }
 
+  // Use MUBUF instructions for global address space access in GFX60x
+  if (getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+    FlatForGlobal = false;
+    if (FS.contains("+flat-for-global"))
+       errs() << "GFX6 do not support Flat instructions for Global Address Space\n";
+  }
+
   // Set defaults if needed.
   if (MaxPrivateElementSize == 0)
     MaxPrivateElementSize = 4;
@@ -182,7 +189,9 @@
     AMDGPUGenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS),
     AMDGPUSubtarget(TT),
     TargetTriple(TT),
-    Gen(TT.getOS() == Triple::AMDHSA ? SEA_ISLANDS : SOUTHERN_ISLANDS),
+    Gen(!GPU.contains("generic") ? SOUTHERN_ISLANDS :
+        (TT.getOS() == Triple::AMDHSA ? SEA_ISLANDS : SOUTHERN_ISLANDS)),
+
     InstrItins(getInstrItineraryForCPU(GPU)),
     LDSBankCount(0),
     MaxPrivateElementSize(0),


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D92483.308957.patch
Type: text/x-patch
Size: 3203 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201202/47c53121/attachment.bin>


More information about the llvm-commits mailing list