[llvm-branch-commits] [llvm-branch] r293329 - Merging r293310:

Matt Arsenault via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Jan 27 12:21:32 PST 2017


Author: arsenm
Date: Fri Jan 27 14:21:31 2017
New Revision: 293329

URL: http://llvm.org/viewvc/llvm-project?rev=293329&view=rev
Log:
Merging r293310:
------------------------------------------------------------------------
r293310 | arsenm | 2017-01-27 09:42:26 -0800 (Fri, 27 Jan 2017) | 8 lines

AMDGPU: Enable FeatureFlatForGlobal on Volcanic Islands

Accomplishes what r292982 was supposed to, which ended up
only really making the necessary test changes.

This should be applied to the 4.0 branch.

Patch by Vedran Miletić <vedran at miletic.net>
------------------------------------------------------------------------

Added:
    llvm/branches/release_40/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll
Removed:
    llvm/branches/release_40/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
Modified:
    llvm/branches/release_40/lib/Target/AMDGPU/AMDGPU.td
    llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
    llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUSubtarget.h
    llvm/branches/release_40/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
    llvm/branches/release_40/test/CodeGen/AMDGPU/v_madak_f16.ll

Modified: llvm/branches/release_40/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_40/lib/Target/AMDGPU/AMDGPU.td?rev=293329&r1=293328&r2=293329&view=diff
==============================================================================
--- llvm/branches/release_40/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/branches/release_40/lib/Target/AMDGPU/AMDGPU.td Fri Jan 27 14:21:31 2017
@@ -282,12 +282,6 @@ def FeatureEnableSIScheduler : Subtarget
   "Enable SI Machine Scheduler"
 >;
 
-def FeatureNoAddr64 : SubtargetFeature<"mubuf-no-addr64",
-  "NoAddr64",
-  "true",
-  "MUBUF instructions have addr64 bit"
->;
-
 // Unless +-flat-for-global is specified, turn on FlatForGlobal for
 // all OS-es on VI and newer hardware to avoid assertion failures due
 // to missing ADDR64 variants of MUBUF instructions.
@@ -297,8 +291,7 @@ def FeatureNoAddr64 : SubtargetFeature<"
 def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
   "FlatForGlobal",
   "true",
-  "Force to generate flat instruction for global",
-  [FeatureNoAddr64]
+  "Force to generate flat instruction for global"
 >;
 
 // Dummy feature used to disable assembler instructions.
@@ -350,8 +343,7 @@ def FeatureVolcanicIslands : SubtargetFe
    FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
    FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
    FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
-   FeatureScalarStores, FeatureInv2PiInlineImm, 
-   FeatureNoAddr64
+   FeatureScalarStores, FeatureInv2PiInlineImm
   ]
 >;
 

Modified: llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=293329&r1=293328&r2=293329&view=diff
==============================================================================
--- llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Fri Jan 27 14:21:31 2017
@@ -48,6 +48,13 @@ AMDGPUSubtarget::initializeSubtargetDepe
 
   ParseSubtargetFeatures(GPU, FullFS);
 
+  // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
+  // on VI and newer hardware to avoid assertion failures due to missing ADDR64
+  // variants of MUBUF instructions.
+  if (!hasAddr64() && !FS.contains("flat-for-global")) {
+    FlatForGlobal = true;
+  }
+
   // FIXME: I don't think think Evergreen has any useful support for
   // denormals, but should be checked. Should we issue a warning somewhere
   // if someone tries to enable these?
@@ -83,7 +90,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
     FP64Denormals(false),
     FPExceptions(false),
     FlatForGlobal(false),
-    NoAddr64(false),
     UnalignedScratchAccess(false),
     UnalignedBufferAccess(false),
 

Modified: llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=293329&r1=293328&r2=293329&view=diff
==============================================================================
--- llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/branches/release_40/lib/Target/AMDGPU/AMDGPUSubtarget.h Fri Jan 27 14:21:31 2017
@@ -86,7 +86,6 @@ protected:
   bool FP64Denormals;
   bool FPExceptions;
   bool FlatForGlobal;
-  bool NoAddr64;
   bool UnalignedScratchAccess;
   bool UnalignedBufferAccess;
   bool EnableXNACK;

Removed: llvm/branches/release_40/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_40/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll?rev=293328&view=auto
==============================================================================
--- llvm/branches/release_40/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll (original)
+++ llvm/branches/release_40/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll (removed)
@@ -1,26 +0,0 @@
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
-
-
-; There are no stack objects even though flat is used by default, so
-; flat_scratch_init should be disabled.
-
-; ALL-LABEL: {{^}}test:
-; HSA: .amd_kernel_code_t
-; HSA: enable_sgpr_flat_scratch_init = 0
-; HSA: .end_amd_kernel_code_t
-
-; ALL-NOT: flat_scr
-
-; HSA-DEFAULT: flat_store_dword
-; HSA-NODEFAULT: buffer_store_dword
-
-; NOHSA-DEFAULT: buffer_store_dword
-; NOHSA-NODEFAULT: flat_store_dword
-define void @test(i32 addrspace(1)* %out) {
-entry:
-  store i32 0, i32 addrspace(1)* %out
-  ret void
-}

Modified: llvm/branches/release_40/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_40/test/CodeGen/AMDGPU/fcanonicalize.f16.ll?rev=293329&r1=293328&r2=293329&view=diff
==============================================================================
--- llvm/branches/release_40/test/CodeGen/AMDGPU/fcanonicalize.f16.ll (original)
+++ llvm/branches/release_40/test/CodeGen/AMDGPU/fcanonicalize.f16.ll Fri Jan 27 14:21:31 2017
@@ -167,6 +167,6 @@ define void @test_fold_canonicalize_snan
 }
 
 attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind "target-features"="-fp16-denormals,-fp16-denormals" }
-attributes #3 = { nounwind "target-features"="+fp16-denormals,+fp64-denormals" }
+attributes #1 = { nounwind "target-features"="-flat-for-global" }
+attributes #2 = { nounwind "target-features"="-flat-for-global,-fp16-denormals,-fp16-denormals" }
+attributes #3 = { nounwind "target-features"="-flat-for-global,+fp16-denormals,+fp64-denormals" }

Added: llvm/branches/release_40/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_40/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll?rev=293329&view=auto
==============================================================================
--- llvm/branches/release_40/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll (added)
+++ llvm/branches/release_40/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll Fri Jan 27 14:21:31 2017
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s
+
+
+; There are no stack objects even though flat is used by default, so
+; flat_scratch_init should be disabled.
+
+; ALL-LABEL: {{^}}test:
+; HSA: .amd_kernel_code_t
+; HSA: enable_sgpr_flat_scratch_init = 0
+; HSA: .end_amd_kernel_code_t
+
+; ALL-NOT: flat_scr
+
+; HSA-DEFAULT: flat_store_dword
+; HSA-NODEFAULT: buffer_store_dword
+; HSA-NOADDR64: flat_store_dword
+
+; NOHSA-DEFAULT: buffer_store_dword
+; NOHSA-NODEFAULT: flat_store_dword
+; NOHSA-NOADDR64: flat_store_dword
+define void @test(i32 addrspace(1)* %out) {
+entry:
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
+
+; HSA-DEFAULT: flat_store_dword
+; HSA-NODEFAULT: buffer_store_dword
+; HSA-NOADDR64: flat_store_dword
+
+; NOHSA-DEFAULT: buffer_store_dword
+; NOHSA-NODEFAULT: flat_store_dword
+; NOHSA-NOADDR64: flat_store_dword
+define void @test_addr64(i32 addrspace(1)* %out) {
+entry:
+  %out.addr = alloca i32 addrspace(1)*, align 4
+
+  store i32 addrspace(1)* %out, i32 addrspace(1)** %out.addr, align 4
+  %ld0 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
+
+  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %ld0, i32 0
+  store i32 1, i32 addrspace(1)* %arrayidx, align 4
+
+  %ld1 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %ld1, i32 1
+  store i32 2, i32 addrspace(1)* %arrayidx1, align 4
+
+  ret void
+}

Modified: llvm/branches/release_40/test/CodeGen/AMDGPU/v_madak_f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_40/test/CodeGen/AMDGPU/v_madak_f16.ll?rev=293329&r1=293328&r2=293329&view=diff
==============================================================================
--- llvm/branches/release_40/test/CodeGen/AMDGPU/v_madak_f16.ll (original)
+++ llvm/branches/release_40/test/CodeGen/AMDGPU/v_madak_f16.ll Fri Jan 27 14:21:31 2017
@@ -1,5 +1,5 @@
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
 ; GCN-LABEL: {{^}}madak_f16
 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]




More information about the llvm-branch-commits mailing list