[PATCH] D29147: [master + 4.0 branch] Enable FeatureFlatForGlobal on Volcanic Islands
Vedran Miletić via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 25 13:50:52 PST 2017
rivanvx created this revision.
rivanvx added a project: AMDGPU.
Herald added subscribers: nhaehnle, wdng.
The variant of https://reviews.llvm.org/D28809 that landed unfortunately does not enable +flat-for-global where necessary. This patch fixes the issue.
https://reviews.llvm.org/D29147
Files:
lib/Target/AMDGPU/AMDGPU.td
lib/Target/AMDGPU/AMDGPUSubtarget.cpp
lib/Target/AMDGPU/AMDGPUSubtarget.h
test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
Index: test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
===================================================================
--- test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
+++ test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
@@ -1,7 +1,9 @@
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s
; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s
; There are no stack objects even though flat is used by default, so
@@ -16,11 +18,37 @@
; HSA-DEFAULT: flat_store_dword
; HSA-NODEFAULT: buffer_store_dword
+; HSA-NOADDR64: flat_store_dword
; NOHSA-DEFAULT: buffer_store_dword
; NOHSA-NODEFAULT: flat_store_dword
+; NOHSA-NOADDR64: flat_store_dword
define void @test(i32 addrspace(1)* %out) {
entry:
store i32 0, i32 addrspace(1)* %out
ret void
}
+
+; HSA-DEFAULT: flat_store_dword
+; HSA-NODEFAULT: buffer_store_dword
+; HSA-NOADDR64: flat_store_dword
+
+; NOHSA-DEFAULT: buffer_store_dword
+; NOHSA-NODEFAULT: flat_store_dword
+; NOHSA-NOADDR64: flat_store_dword
+define void @test_addr64(i32 addrspace(1)* %out) {
+entry:
+ %out.addr = alloca i32 addrspace(1)*, align 4
+
+ store i32 addrspace(1)* %out, i32 addrspace(1)** %out.addr, align 4
+ %ld0 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
+
+ %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %ld0, i32 0
+ store i32 1, i32 addrspace(1)* %arrayidx, align 4
+
+ %ld1 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %ld1, i32 1
+ store i32 2, i32 addrspace(1)* %arrayidx1, align 4
+
+ ret void
+}
Index: lib/Target/AMDGPU/AMDGPUSubtarget.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -203,7 +203,7 @@
}
bool hasAddr64() const {
- return (getGeneration() < VOLCANIC_ISLANDS);
+ return !NoAddr64;
}
bool hasBFE() const {
Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -49,6 +49,13 @@
ParseSubtargetFeatures(GPU, FullFS);
+ // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
+ // on VI and newer hardware to avoid assertion failures due to missing ADDR64
+ // variants of MUBUF instructions.
+ if (!hasAddr64() && !FS.contains("flat-for-global")) {
+ FlatForGlobal = true;
+ }
+
// FIXME: I don't think think Evergreen has any useful support for
// denormals, but should be checked. Should we issue a warning somewhere
// if someone tries to enable these?
Index: lib/Target/AMDGPU/AMDGPU.td
===================================================================
--- lib/Target/AMDGPU/AMDGPU.td
+++ lib/Target/AMDGPU/AMDGPU.td
@@ -320,8 +320,7 @@
def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
"FlatForGlobal",
"true",
- "Force to generate flat instruction for global",
- [FeatureNoAddr64]
+ "Force to generate flat instruction for global"
>;
// Dummy feature used to disable assembler instructions.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D29147.85803.patch
Type: text/x-patch
Size: 3830 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170125/e0d27d67/attachment.bin>
More information about the llvm-commits
mailing list