[llvm] c48ceaf - Revert "[AMDGPU] Set the CostPerUse value for vgpr registers."

Mon Apr 20 13:48:45 PDT 2020

Author: Piotr Sobczak
Date: 2020-04-20T22:47:31+02:00
New Revision: c48ceaf37b0b328c2cd09b8fceca2fa533aa3023

URL: https://github.com/llvm/llvm-project/commit/c48ceaf37b0b328c2cd09b8fceca2fa533aa3023
DIFF: https://github.com/llvm/llvm-project/commit/c48ceaf37b0b328c2cd09b8fceca2fa533aa3023.diff

LOG: Revert "[AMDGPU] Set the CostPerUse value for vgpr registers."

This reverts commit 728b878de689e4921ca7f864ed3036f9b2c53853.

D76417 has caused vgpr count to go up significantly in real-world
graphics content.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIRegisterInfo.td
    llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
    llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 777e6cc5d3f7..92d0cd7363e5 100644

--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -274,11 +274,6 @@ foreach Index = 0-105 in {
 
 // VGPR registers
 foreach Index = 0-255 in {
-  // Set a cost value for vgprs other than the argument registers (v0-v31).
-  // The ratio of index/allocation_granularity is taken as the cost value.
-  // Considered the allocation granularity as 4 here.
-  let CostPerUse=!if(!gt(Index, 31), !srl(Index, 2), 0) in {
-
   // There is no special encoding for low 16 bit subreg, this not a real
   // register but rather an operand for instructions preserving high 16 bits
   // of the result or reading just low 16 bits of a 32 bit VGPR.
@@ -302,7 +297,6 @@ foreach Index = 0-255 in {
     let HWEncoding{8} = 1;
     let SubRegIndices = [lo16, hi16];
   }
-  }
 }
 
 // AccVGPR registers

diff  --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
index cb4b5bd57110..29253a05e3e7 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
@@ -246,7 +246,7 @@ define amdgpu_kernel void @max_256_vgprs_spill_9x32(<32 x float> addrspace(1)* %
 ; GFX908-DAG: v_accvgpr_read_b32
 
 ; GCN:    NumVgprs: 256
-; GFX900: ScratchSize: 708
+; GFX900: ScratchSize: 644
 ; GFX908-FIXME: ScratchSize: 0
 ; GCN:    VGPRBlocks: 63
 ; GCN:    NumVGPRsForWavesPerEU: 256

diff  --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
index 07a29adc6393..d9327368ac82 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
@@ -24,7 +24,7 @@
 ; OFFREG is offset system SGPR
 ; GCN: buffer_store_dword {{v[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Spill
 ; GCN: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Reload
-; GCN: NumVgprs: 255
+; GCN: NumVgprs: 256
 ; GCN: ScratchSize: 1536
 
 define amdgpu_vs void @main([9 x <4 x i32>] addrspace(4)* inreg %arg, [17 x <4 x i32>] addrspace(4)* inreg %arg1, [17 x <4 x i32>] addrspace(4)* inreg %arg2, [34 x <8 x i32>] addrspace(4)* inreg %arg3, [16 x <4 x i32>] addrspace(4)* inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {