[llvm] r293310 - AMDGPU: Enable FeatureFlatForGlobal on Volcanic Islands

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 27 09:42:27 PST 2017


Author: arsenm
Date: Fri Jan 27 11:42:26 2017
New Revision: 293310

URL: http://llvm.org/viewvc/llvm-project?rev=293310&view=rev
Log:
AMDGPU: Enable FeatureFlatForGlobal on Volcanic Islands

Accomplishes what r292982 was supposed to, which ended up
only really making the necessary test changes.

This should be applied to the 4.0 branch.

Patch by Vedran Miletić <vedran at miletic.net>

Added:
    llvm/trunk/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll
Removed:
    llvm/trunk/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
    llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=293310&r1=293309&r2=293310&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Fri Jan 27 11:42:26 2017
@@ -305,12 +305,6 @@ def FeatureEnableSIScheduler : Subtarget
   "Enable SI Machine Scheduler"
 >;
 
-def FeatureNoAddr64 : SubtargetFeature<"mubuf-no-addr64",
-  "NoAddr64",
-  "true",
-  "MUBUF instructions have addr64 bit"
->;
-
 // Unless +-flat-for-global is specified, turn on FlatForGlobal for
 // all OS-es on VI and newer hardware to avoid assertion failures due
 // to missing ADDR64 variants of MUBUF instructions.
@@ -320,8 +314,7 @@ def FeatureNoAddr64 : SubtargetFeature<"
 def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
   "FlatForGlobal",
   "true",
-  "Force to generate flat instruction for global",
-  [FeatureNoAddr64]
+  "Force to generate flat instruction for global"
 >;
 
 // Dummy feature used to disable assembler instructions.
@@ -374,7 +367,7 @@ def FeatureVolcanicIslands : SubtargetFe
    FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
    FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
    FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA,
-   FeatureDPP, FeatureNoAddr64
+   FeatureDPP
   ]
 >;
 

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=293310&r1=293309&r2=293310&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Fri Jan 27 11:42:26 2017
@@ -49,6 +49,13 @@ AMDGPUSubtarget::initializeSubtargetDepe
 
   ParseSubtargetFeatures(GPU, FullFS);
 
+  // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
+  // on VI and newer hardware to avoid assertion failures due to missing ADDR64
+  // variants of MUBUF instructions.
+  if (!hasAddr64() && !FS.contains("flat-for-global")) {
+    FlatForGlobal = true;
+  }
+
   // FIXME: I don't think think Evergreen has any useful support for
   // denormals, but should be checked. Should we issue a warning somewhere
   // if someone tries to enable these?
@@ -82,7 +89,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
     FP64FP16Denormals(false),
     FPExceptions(false),
     FlatForGlobal(false),
-    NoAddr64(false),
     UnalignedScratchAccess(false),
     UnalignedBufferAccess(false),
 

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=293310&r1=293309&r2=293310&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Fri Jan 27 11:42:26 2017
@@ -85,7 +85,6 @@ protected:
   bool FP64FP16Denormals;
   bool FPExceptions;
   bool FlatForGlobal;
-  bool NoAddr64;
   bool UnalignedScratchAccess;
   bool UnalignedBufferAccess;
   bool EnableXNACK;

Removed: llvm/trunk/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll?rev=293309&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll (removed)
@@ -1,26 +0,0 @@
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
-
-
-; There are no stack objects even though flat is used by default, so
-; flat_scratch_init should be disabled.
-
-; ALL-LABEL: {{^}}test:
-; HSA: .amd_kernel_code_t
-; HSA: enable_sgpr_flat_scratch_init = 0
-; HSA: .end_amd_kernel_code_t
-
-; ALL-NOT: flat_scr
-
-; HSA-DEFAULT: flat_store_dword
-; HSA-NODEFAULT: buffer_store_dword
-
-; NOHSA-DEFAULT: buffer_store_dword
-; NOHSA-NODEFAULT: flat_store_dword
-define void @test(i32 addrspace(1)* %out) {
-entry:
-  store i32 0, i32 addrspace(1)* %out
-  ret void
-}

Added: llvm/trunk/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll?rev=293310&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll Fri Jan 27 11:42:26 2017
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s
+
+
+; There are no stack objects even though flat is used by default, so
+; flat_scratch_init should be disabled.
+
+; ALL-LABEL: {{^}}test:
+; HSA: .amd_kernel_code_t
+; HSA: enable_sgpr_flat_scratch_init = 0
+; HSA: .end_amd_kernel_code_t
+
+; ALL-NOT: flat_scr
+
+; HSA-DEFAULT: flat_store_dword
+; HSA-NODEFAULT: buffer_store_dword
+; HSA-NOADDR64: flat_store_dword
+
+; NOHSA-DEFAULT: buffer_store_dword
+; NOHSA-NODEFAULT: flat_store_dword
+; NOHSA-NOADDR64: flat_store_dword
+define void @test(i32 addrspace(1)* %out) {
+entry:
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
+
+; HSA-DEFAULT: flat_store_dword
+; HSA-NODEFAULT: buffer_store_dword
+; HSA-NOADDR64: flat_store_dword
+
+; NOHSA-DEFAULT: buffer_store_dword
+; NOHSA-NODEFAULT: flat_store_dword
+; NOHSA-NOADDR64: flat_store_dword
+define void @test_addr64(i32 addrspace(1)* %out) {
+entry:
+  %out.addr = alloca i32 addrspace(1)*, align 4
+
+  store i32 addrspace(1)* %out, i32 addrspace(1)** %out.addr, align 4
+  %ld0 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
+
+  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %ld0, i32 0
+  store i32 1, i32 addrspace(1)* %arrayidx, align 4
+
+  %ld1 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %ld1, i32 1
+  store i32 2, i32 addrspace(1)* %arrayidx1, align 4
+
+  ret void
+}




More information about the llvm-commits mailing list