[llvm] r289262 - AMDGPU/SI: Don't reserve XNACK when it's disabled

Marek Olsak via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 9 11:49:55 PST 2016


Author: mareko
Date: Fri Dec  9 13:49:54 2016
New Revision: 289262

URL: http://llvm.org/viewvc/llvm-project?rev=289262&view=rev
Log:
AMDGPU/SI: Don't reserve XNACK when it's disabled

Summary:
This frees 2 additional scalar registers.

These are results from all of my 3 patches combined:

  Polaris:
    Spilled SGPRs: 2231 -> 1517 (-32.00 %)

  Tonga:
    Spilled SGPRs: 3829 -> 2608 (-31.89 %)
    Spilled VGPRs: 100 -> 84 (-16.00 %)

  Tonga even spills SGPRs via VGPRs to scratch. That's a compute shader
  limited to 64 VGPRs.

Reviewers: tstellarAMD

Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, tony-tye

Differential Revision: https://reviews.llvm.org/D27151

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
    llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
    llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll
    llvm/trunk/test/CodeGen/AMDGPU/flat-scratch-reg.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=289262&r1=289261&r2=289262&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Fri Dec  9 13:49:54 2016
@@ -73,6 +73,13 @@ def FeatureUnalignedScratchAccess : Subt
   "Support unaligned scratch loads and stores"
 >;
 
+// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
+// XNACK. The current default kernel driver setting is:
+// - graphics ring: XNACK disabled
+// - compute ring: XNACK enabled
+//
+// If XNACK is enabled, the VMEM latency can be worse.
+// If XNACK is disabled, the 2 SGPRs can be used for general purposes.
 def FeatureXNACK : SubtargetFeature<"xnack",
   "EnableXNACK",
   "true",

Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=289262&r1=289261&r2=289262&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Fri Dec  9 13:49:54 2016
@@ -1188,7 +1188,7 @@ unsigned SIRegisterInfo::getNumReservedS
       return 4; // FLAT_SCRATCH, VCC (in that order)
   }
 
-  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+  if (ST.isXNACKEnabled())
     return 4; // XNACK, VCC (in that order)
 
   return 2; // VCC.

Modified: llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll?rev=289262&r1=289261&r2=289262&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll Fri Dec  9 13:49:54 2016
@@ -9,10 +9,10 @@
 ; FIXME: Should be ablo to skip this copying of the private segment
 ; buffer because all the SGPR spills are to VGPRs.
 
-; ALL: s_mov_b64 s[6:7], s[2:3]
-; ALL: s_mov_b64 s[4:5], s[0:1]
+; ALL: s_mov_b64 s[10:11], s[2:3]
+; ALL: s_mov_b64 s[8:9], s[0:1]
 ; ALL: SGPRBlocks: 1
-; ALL: NumSGPRsForWavesPerEU: 12
+; ALL: NumSGPRsForWavesPerEU: 14
 define void @max_12_sgprs(i32 addrspace(1)* %out1,
 
                           i32 addrspace(1)* %out2,
@@ -46,9 +46,9 @@ define void @max_12_sgprs(i32 addrspace(
 ; TOSGPR: SGPRBlocks: 1
 ; TOSGPR: NumSGPRsForWavesPerEU: 16
 
-; TOSMEM: s_mov_b64 s[6:7], s[2:3]
-; TOSMEM: s_mov_b32 s9, s13
-; TOSMEM: s_mov_b64 s[4:5], s[0:1]
+; TOSMEM: s_mov_b64 s[10:11], s[2:3]
+; TOSMEM: s_mov_b64 s[8:9], s[0:1]
+; TOSMEM: s_mov_b32 s7, s13
 
 ; TOSMEM: SGPRBlocks: 1
 ; TOSMEM: NumSGPRsForWavesPerEU: 16

Modified: llvm/trunk/test/CodeGen/AMDGPU/flat-scratch-reg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/flat-scratch-reg.ll?rev=289262&r1=289261&r2=289262&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/flat-scratch-reg.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/flat-scratch-reg.ll Fri Dec  9 13:49:54 2016
@@ -1,5 +1,9 @@
 ; RUN: llc -march=amdgcn -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s
 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s
+
+; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK  -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK  -check-prefix=GCN %s
+
 ; RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK  -check-prefix=GCN %s
 ; RUN: llc -march=amdgcn -mcpu=stoney -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK  -check-prefix=GCN %s
 




More information about the llvm-commits mailing list