[PATCH] D29147: [master + 4.0 branch] Enable FeatureFlatForGlobal on Volcanic Islands

Vedran Miletić via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 25 13:50:52 PST 2017


rivanvx created this revision.
rivanvx added a project: AMDGPU.
Herald added subscribers: nhaehnle, wdng.

The variant of https://reviews.llvm.org/D28809 that landed unfortunately does not enable +flat-for-global where necessary. This patch fixes the issue.


https://reviews.llvm.org/D29147

Files:
  lib/Target/AMDGPU/AMDGPU.td
  lib/Target/AMDGPU/AMDGPUSubtarget.cpp
  lib/Target/AMDGPU/AMDGPUSubtarget.h
  test/CodeGen/AMDGPU/ci-use-flat-for-global.ll


Index: test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
===================================================================
--- test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
+++ test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
@@ -1,7 +1,9 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s
 ; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
 ; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s
 
 
 ; There are no stack objects even though flat is used by default, so
@@ -16,11 +18,37 @@
 
 ; HSA-DEFAULT: flat_store_dword
 ; HSA-NODEFAULT: buffer_store_dword
+; HSA-NOADDR64: flat_store_dword
 
 ; NOHSA-DEFAULT: buffer_store_dword
 ; NOHSA-NODEFAULT: flat_store_dword
+; NOHSA-NOADDR64: flat_store_dword
 define void @test(i32 addrspace(1)* %out) {
 entry:
   store i32 0, i32 addrspace(1)* %out
   ret void
 }
+
+; HSA-DEFAULT: flat_store_dword
+; HSA-NODEFAULT: buffer_store_dword
+; HSA-NOADDR64: flat_store_dword
+
+; NOHSA-DEFAULT: buffer_store_dword
+; NOHSA-NODEFAULT: flat_store_dword
+; NOHSA-NOADDR64: flat_store_dword
+define void @test_addr64(i32 addrspace(1)* %out) {
+entry:
+  %out.addr = alloca i32 addrspace(1)*, align 4
+
+  store i32 addrspace(1)* %out, i32 addrspace(1)** %out.addr, align 4
+  %ld0 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
+
+  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %ld0, i32 0
+  store i32 1, i32 addrspace(1)* %arrayidx, align 4
+
+  %ld1 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %ld1, i32 1
+  store i32 2, i32 addrspace(1)* %arrayidx1, align 4
+
+  ret void
+}
Index: lib/Target/AMDGPU/AMDGPUSubtarget.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -203,7 +203,7 @@
   }
 
   bool hasAddr64() const {
-    return (getGeneration() < VOLCANIC_ISLANDS);
+    return !NoAddr64;
   }
 
   bool hasBFE() const {
Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -49,6 +49,13 @@
 
   ParseSubtargetFeatures(GPU, FullFS);
 
+  // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
+  // on VI and newer hardware to avoid assertion failures due to missing ADDR64
+  // variants of MUBUF instructions.
+  if (!hasAddr64() && !FS.contains("flat-for-global")) {
+    FlatForGlobal = true;
+  }
+
   // FIXME: I don't think think Evergreen has any useful support for
   // denormals, but should be checked. Should we issue a warning somewhere
   // if someone tries to enable these?
Index: lib/Target/AMDGPU/AMDGPU.td
===================================================================
--- lib/Target/AMDGPU/AMDGPU.td
+++ lib/Target/AMDGPU/AMDGPU.td
@@ -320,8 +320,7 @@
 def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
   "FlatForGlobal",
   "true",
-  "Force to generate flat instruction for global",
-  [FeatureNoAddr64]
+  "Force to generate flat instruction for global"
 >;
 
 // Dummy feature used to disable assembler instructions.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D29147.85803.patch
Type: text/x-patch
Size: 3830 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170125/e0d27d67/attachment.bin>


More information about the llvm-commits mailing list