[llvm] r348014 - [AMDGPU] Disable SReg Global LD/ST, perf regression

Ron Lieberman via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 30 10:29:17 PST 2018


Author: ronlieb
Date: Fri Nov 30 10:29:17 2018
New Revision: 348014

URL: http://llvm.org/viewvc/llvm-project?rev=348014&view=rev
Log:
[AMDGPU] Disable SReg Global LD/ST, perf regression

Differential Revision: https://reviews.llvm.org/D55093

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp
    llvm/trunk/test/CodeGen/AMDGPU/ds_write2.ll
    llvm/trunk/test/CodeGen/AMDGPU/ds_write2st64.ll
    llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir
    llvm/trunk/test/CodeGen/AMDGPU/global-saddr.ll
    llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll
    llvm/trunk/test/CodeGen/AMDGPU/madak.ll
    llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll
    llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll
    llvm/trunk/test/CodeGen/AMDGPU/memory_clause.ll
    llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp?rev=348014&r1=348013&r2=348014&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp Fri Nov 30 10:29:17 2018
@@ -43,6 +43,11 @@
 
 using namespace llvm;
 
+static cl::opt<bool> EnableGlobalSGPRAddr(
+  "amdgpu-enable-global-sgpr-addr",
+  cl::desc("Enable use of SGPR regs for GLOBAL LOAD/STORE instructions"),
+  cl::init(false));
+
 STATISTIC(NumSGPRGlobalOccurs, "Number of global ld/st opportunities");
 STATISTIC(NumSGPRGlobalSaddrs, "Number of global sgpr instructions converted");
 
@@ -155,6 +160,8 @@ static bool fixupGlobalSaddr(MachineBasi
                              const GCNSubtarget &ST,
                              const SIInstrInfo *TII,
                              const SIRegisterInfo *TRI) {
+  if (!EnableGlobalSGPRAddr)
+    return false;
   bool FuncModified = false;
   MachineBasicBlock::iterator I, Next;
   for (I = MBB.begin(); I != MBB.end(); I = Next) {

Modified: llvm/trunk/test/CodeGen/AMDGPU/ds_write2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ds_write2.ll?rev=348014&r1=348013&r2=348014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ds_write2.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ds_write2.ll Fri Nov 30 10:29:17 2018
@@ -1,5 +1,5 @@
 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,CI %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+flat-for-global < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+flat-for-global -amdgpu-enable-global-sgpr-addr < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9 %s
 
 @lds = addrspace(3) global [512 x float] undef, align 4
 @lds.f64 = addrspace(3) global [512 x double] undef, align 8

Modified: llvm/trunk/test/CodeGen/AMDGPU/ds_write2st64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ds_write2st64.ll?rev=348014&r1=348013&r2=348014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ds_write2st64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ds_write2st64.ll Fri Nov 30 10:29:17 2018
@@ -1,5 +1,5 @@
 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt -amdgpu-enable-global-sgpr-addr < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
 
 @lds = addrspace(3) global [512 x float] undef, align 4
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir?rev=348014&r1=348013&r2=348014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir Fri Nov 30 10:29:17 2018
@@ -1,4 +1,4 @@
-# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-fixup-vector-isel %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-fixup-vector-isel -amdgpu-enable-global-sgpr-addr %s -o - | FileCheck -check-prefix=GCN %s
 
 # Coverage tests for GLOBAL_* to their _SADDR equivalent.
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/global-saddr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/global-saddr.ll?rev=348014&r1=348013&r2=348014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/global-saddr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/global-saddr.ll Fri Nov 30 10:29:17 2018
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefix=GFX9 %s
 
 ; Test for a conv2d like sequence of loads.
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll?rev=348014&r1=348013&r2=348014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll Fri Nov 30 10:29:17 2018
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-amdgpu-aa=0 -mattr=+flat-for-global,-fp64-fp16-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s
+; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-amdgpu-aa=0 -mattr=+flat-for-global,-fp64-fp16-denormals -amdgpu-enable-global-sgpr-addr < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s
 
 ; GCN-LABEL: {{^}}v_insertelement_v2i16_dynamic_vgpr:
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/madak.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/madak.ll?rev=348014&r1=348013&r2=348014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/madak.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/madak.ll Fri Nov 30 10:29:17 2018
@@ -1,6 +1,6 @@
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6 %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8_9 %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX8_9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX9,GFX8_9 %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 declare float @llvm.fabs.f32(float) nounwind readnone

Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll?rev=348014&r1=348013&r2=348014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll Fri Nov 30 10:29:17 2018
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
-; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
+; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
 
 declare i32 @llvm.amdgcn.workitem.id.x()
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll?rev=348014&r1=348013&r2=348014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll Fri Nov 30 10:29:17 2018
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
-; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
+; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
 
 declare i32 @llvm.amdgcn.workitem.id.x()
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/memory_clause.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory_clause.ll?rev=348014&r1=348013&r2=348014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory_clause.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory_clause.ll Fri Nov 30 10:29:17 2018
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}vector_clause:
 ; GCN:      global_load_dwordx4

Modified: llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll?rev=348014&r1=348013&r2=348014&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll Fri Nov 30 10:29:17 2018
@@ -1,5 +1,5 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=bonaire -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi -amdgpu-enable-global-sgpr-addr < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
 
 declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
 declare void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)




More information about the llvm-commits mailing list