[PATCH] D20728: AMDGPU: Disable AMDGPUPromoteAlloca pass for shader calling conventions.

Thu Jun 16 17:18:31 PDT 2016

bnieuwenhuizen updated this revision to Diff 61051.
bnieuwenhuizen added a comment.

Added a test & comment and moved the check so that only the promote to LDS is disabled.

An example of extra conditions of LDS usage for other stages is the PS needing implicit LDS
space for interpolation inputs. Furthermore some shader have no LDS_SIZE register and I am
not sure whether we can allocate some in all cases (e.g. VS has no dedicated field, but can
we allocate using the LS LDS_SIZE even if the LS does not run?)


http://reviews.llvm.org/D20728

Files:
  lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
  test/CodeGen/AMDGPU/promote-alloca-shaders.ll

Index: test/CodeGen/AMDGPU/promote-alloca-shaders.ll
===================================================================

--- /dev/null
+++ test/CodeGen/AMDGPU/promote-alloca-shaders.ll
@@ -0,0 +1,29 @@
+; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=ASM %s
+
+; IR-LABEL: define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
+; IR: alloca [5 x i32]
+; ASM-LABEL: {{^}}promote_alloca_shaders:
+; ASM: ; LDSByteSize: 0 bytes/workgroup (compile time only)
+
+define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
+entry:
+  %stack = alloca [5 x i32], align 4
+  %tmp0 = load i32, i32 addrspace(1)* %in, align 4
+  %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0
+  store i32 4, i32* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
+  %tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1
+  store i32 5, i32* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
+  %tmp2 = load i32, i32* %arrayidx4, align 4
+  store i32 %tmp2, i32 addrspace(1)* %out, align 4
+  %arrayidx5 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
+  %tmp3 = load i32, i32* %arrayidx5
+  %arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
+  store i32 %tmp3, i32 addrspace(1)* %arrayidx6
+  ret void
+}
+
+attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" }
Index: lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -647,6 +647,12 @@
 
   const Function &ContainingFunction = *I.getParent()->getParent();
 
+  // Don't promote the alloca to LDS for shader calling conventions as the work
+  // item ID intrinsics are not supported for these calling conventions.
+  // Furthermore not all LDS is available for some of the stages.
+  if (AMDGPU::isShader(ContainingFunction.getCallingConv()))
+    return;
+
   // FIXME: We should also try to get this value from the reqd_work_group_size
   // function attribute if it is available.
   unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D20728.61051.patch
Type: text/x-patch
Size: 2546 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160617/b82ceed7/attachment.bin>