[llvm] r269145 - AMDGPU: Change private_element_size to 4

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue May 10 17:28:54 PDT 2016


Author: arsenm
Date: Tue May 10 19:28:54 2016
New Revision: 269145

URL: http://llvm.org/viewvc/llvm-project?rev=269145&view=rev
Log:
AMDGPU: Change private_element_size to 4

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
    llvm/trunk/test/CodeGen/AMDGPU/indirect-private-64.ll
    llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll
    llvm/trunk/test/CodeGen/AMDGPU/large-alloca-compute.ll
    llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll
    llvm/trunk/test/CodeGen/AMDGPU/large-work-group-registers.ll
    llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
    llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=269145&r1=269144&r2=269145&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Tue May 10 19:28:54 2016
@@ -73,7 +73,7 @@ AMDGPUSubtarget::initializeSubtargetDepe
 
   // Set defaults if needed.
   if (MaxPrivateElementSize == 0)
-    MaxPrivateElementSize = 16;
+    MaxPrivateElementSize = 4;
 
   return *this;
 }

Modified: llvm/trunk/test/CodeGen/AMDGPU/indirect-private-64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/indirect-private-64.ll?rev=269145&r1=269144&r2=269145&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/indirect-private-64.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/indirect-private-64.ll Tue May 10 19:28:54 2016
@@ -1,14 +1,20 @@
-; RUN: llc -march=amdgcn -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mattr=-promote-alloca,+max-private-element-size-16 -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA16 -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mattr=-promote-alloca,+max-private-element-size-4 -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA4 -check-prefix=SI %s
 ; RUN: llc -march=amdgcn -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca,+max-private-element-size-16 -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA16 -check-prefix=SI %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
 
 declare void @llvm.amdgcn.s.barrier() #1
 
 ; SI-LABEL: {{^}}private_access_f64_alloca:
 
-; SI-ALLOCA: buffer_store_dwordx2
-; SI-ALLOCA: buffer_load_dwordx2
+; SI-ALLOCA16: buffer_store_dwordx2
+; SI-ALLOCA16: buffer_load_dwordx2
+
+; SI-ALLOCA4: buffer_store_dword v
+; SI-ALLOCA4: buffer_store_dword v
+; SI-ALLOCA4: buffer_load_dword v
+; SI-ALLOCA4: buffer_load_dword v
 
 ; SI-PROMOTE: ds_write_b64
 ; SI-PROMOTE: ds_read_b64
@@ -25,8 +31,17 @@ define void @private_access_f64_alloca(d
 
 ; SI-LABEL: {{^}}private_access_v2f64_alloca:
 
-; SI-ALLOCA: buffer_store_dwordx4
-; SI-ALLOCA: buffer_load_dwordx4
+; SI-ALLOCA16: buffer_store_dwordx4
+; SI-ALLOCA16: buffer_load_dwordx4
+
+; SI-ALLOCA4: buffer_store_dword v
+; SI-ALLOCA4: buffer_store_dword v
+; SI-ALLOCA4: buffer_store_dword v
+; SI-ALLOCA4: buffer_store_dword v
+; SI-ALLOCA4: buffer_load_dword v
+; SI-ALLOCA4: buffer_load_dword v
+; SI-ALLOCA4: buffer_load_dword v
+; SI-ALLOCA4: buffer_load_dword v
 
 ; SI-PROMOTE: ds_write_b64
 ; SI-PROMOTE: ds_write_b64
@@ -45,8 +60,14 @@ define void @private_access_v2f64_alloca
 
 ; SI-LABEL: {{^}}private_access_i64_alloca:
 
-; SI-ALLOCA: buffer_store_dwordx2
-; SI-ALLOCA: buffer_load_dwordx2
+; SI-ALLOCA16: buffer_store_dwordx2
+; SI-ALLOCA16: buffer_load_dwordx2
+
+; SI-ALLOCA4: buffer_store_dword v
+; SI-ALLOCA4: buffer_store_dword v
+; SI-ALLOCA4: buffer_load_dword v
+; SI-ALLOCA4: buffer_load_dword v
+
 
 ; SI-PROMOTE: ds_write_b64
 ; SI-PROMOTE: ds_read_b64
@@ -63,8 +84,18 @@ define void @private_access_i64_alloca(i
 
 ; SI-LABEL: {{^}}private_access_v2i64_alloca:
 
-; SI-ALLOCA: buffer_store_dwordx4
-; SI-ALLOCA: buffer_load_dwordx4
+; SI-ALLOCA16: buffer_store_dwordx4
+; SI-ALLOCA16: buffer_load_dwordx4
+
+; SI-ALLOCA4: buffer_store_dword v
+; SI-ALLOCA4: buffer_store_dword v
+; SI-ALLOCA4: buffer_store_dword v
+; SI-ALLOCA4: buffer_store_dword v
+
+; SI-ALLOCA4: buffer_load_dword v
+; SI-ALLOCA4: buffer_load_dword v
+; SI-ALLOCA4: buffer_load_dword v
+; SI-ALLOCA4: buffer_load_dword v
 
 ; SI-PROMOTE: ds_write_b64
 ; SI-PROMOTE: ds_write_b64

Modified: llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll?rev=269145&r1=269144&r2=269145&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll Tue May 10 19:28:54 2016
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=SI %s
 
 ; FIXME: Broken on evergreen
 ; FIXME: For some reason the 8 and 16 vectors are being stored as

Modified: llvm/trunk/test/CodeGen/AMDGPU/large-alloca-compute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/large-alloca-compute.ll?rev=269145&r1=269144&r2=269145&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/large-alloca-compute.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/large-alloca-compute.ll Tue May 10 19:28:54 2016
@@ -7,11 +7,11 @@
 
 ; ALL-LABEL: {{^}}large_alloca_compute_shader:
 
-; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
-; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
-; GCN: s_mov_b32 s10, -1
-; CI: s_mov_b32 s11, 0x98f000
-; VI: s_mov_b32 s11, 0x980000
+; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
+; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
+; GCN-DAG: s_mov_b32 s{{[0-9]+}}, -1
+; CI-DAG: s_mov_b32 s{{[0-9]+}}, 0x88f000
+; VI-DAG: s_mov_b32 s{{[0-9]+}}, 0x880000
 
 
 ; GCNHSA: .amd_kernel_code_t

Modified: llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll?rev=269145&r1=269144&r2=269145&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll Tue May 10 19:28:54 2016
@@ -2,11 +2,11 @@
 ; RUN: llc -march=amdgcn -mcpu=carrizo < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s
 
 ; ALL-LABEL: {{^}}large_alloca_pixel_shader:
-; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
-; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
-; GCN: s_mov_b32 s10, -1
-; CI: s_mov_b32 s11, 0x98f000
-; VI: s_mov_b32 s11, 0x980000
+; GCN-DAG: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GCN-DAG: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GCN-DAG: s_mov_b32 s10, -1
+; CI-DAG: s_mov_b32 s11, 0x88f000
+; VI-DAG: s_mov_b32 s11, 0x880000
 
 ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen
 ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen
@@ -23,11 +23,11 @@ define amdgpu_ps void @large_alloca_pixe
 }
 
 ; ALL-LABEL: {{^}}large_alloca_pixel_shader_inreg:
-; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
-; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
-; GCN: s_mov_b32 s10, -1
-; CI: s_mov_b32 s11, 0x98f000
-; VI: s_mov_b32 s11, 0x980000
+; GCN-DAG: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GCN-DAG: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GCN-DAG: s_mov_b32 s10, -1
+; CI-DAG: s_mov_b32 s11, 0x88f000
+; VI-DAG: s_mov_b32 s11, 0x880000
 
 ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen
 ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen

Modified: llvm/trunk/test/CodeGen/AMDGPU/large-work-group-registers.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/large-work-group-registers.ll?rev=269145&r1=269144&r2=269145&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/large-work-group-registers.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/large-work-group-registers.ll Tue May 10 19:28:54 2016
@@ -1,6 +1,6 @@
 ; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck %s
 
-; CHECK: NumVgprs: 63
+; CHECK: NumVgprs: 64
 define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, <3 x i32> inreg, <3 x i32> inreg, <3 x i32>) #0 {
 main_body:
   %8 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %4, i64 0, i64 8

Modified: llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll?rev=269145&r1=269144&r2=269145&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll Tue May 10 19:28:54 2016
@@ -21,16 +21,23 @@
 
 ; GCNMESA-DAG: s_mov_b32 s16, s3
 ; GCNMESA-DAG: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
-; GCNMESA-DAG: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GCNMESA--DAG: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
 ; GCNMESA-DAG: s_mov_b32 s14, -1
-; SIMESA-DAG: s_mov_b32 s15, 0x98f000
-; VIMESA-DAG: s_mov_b32 s15, 0x980000
+; SIMESA-DAG: s_mov_b32 s15, 0x88f000
+; VIMESA-DAG: s_mov_b32 s15, 0x880000
 
 
 ; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill
 
-; GCN: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
-; GCN: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
+; GCN: buffer_store_dword {{v[0-9]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
+; GCN: buffer_store_dword {{v[0-9]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
+; GCN: buffer_store_dword {{v[0-9]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
+; GCN: buffer_store_dword {{v[0-9]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
+
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
 
 ; GCN: NumVgprs: 256
 ; GCN: ScratchSize: 1024

Modified: llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll?rev=269145&r1=269144&r2=269145&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll Tue May 10 19:28:54 2016
@@ -8,18 +8,20 @@
 ; intermediate register class copies.
 
 ; FIXME: The same register is initialized to 0 for every spill.
+; FIXME: The unused arguments are removed
 
 ; GCN-LABEL: {{^}}main:
 
-; GCN-DAG: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
-; GCN-DAG: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
-; GCN-DAG: s_mov_b32 s14, -1
-; SI-DAG: s_mov_b32 s15, 0x98f000
-; VI-DAG: s_mov_b32 s15, 0x980000
+; GCN-DAG: s_mov_b32 s6, s12
+; GCN-DAG: s_mov_b32 s16, SCRATCH_RSRC_DWORD0
+; GCN-DAG: s_mov_b32 s17, SCRATCH_RSRC_DWORD1
+; GCN-DAG: s_mov_b32 s18, -1
+; SI-DAG: s_mov_b32 s19, 0x88f000
+; VI-DAG: s_mov_b32 s19, 0x880000
 
-; s12 is offset user SGPR
-; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s11 offset:{{[0-9]+}} ; 16-byte Folded Spill
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s[12:15], s11 offset:{{[0-9]+}} ; 16-byte Folded Reload
+; s6 is offset system SGPR
+; GCN: buffer_store_dword {{v[0-9]+}}, off, s[16:19], s6 offset:{{[0-9]+}} ; 16-byte Folded Spill
+; GCN: buffer_load_dword v{{[0-9]+}}, off, s[16:19], s6 offset:{{[0-9]+}} ; 16-byte Folded Reload
 
 ; GCN: NumVgprs: 256
 ; GCN: ScratchSize: 1024




More information about the llvm-commits mailing list