[llvm] ccc6e78 - AMDGPU: Directly annotate functions if they have calls

Thu Mar 12 16:12:43 PDT 2020

Author: Matt Arsenault
Date: 2020-03-12T19:10:59-04:00
New Revision: ccc6e780c8fa769fc503f193d27a1ef356f6355d

URL: https://github.com/llvm/llvm-project/commit/ccc6e780c8fa769fc503f193d27a1ef356f6355d
DIFF: https://github.com/llvm/llvm-project/commit/ccc6e780c8fa769fc503f193d27a1ef356f6355d.diff

LOG: AMDGPU: Directly annotate functions if they have calls

Currently we infer whether the flat-scratch-init kernel input should
be enabled based on calls. Move this handling, so we can decide if the
full set of ABI inputs is needed in kernels. Ideally we would have an
analysis of some sort, rather than the function attributes.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
    llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
    llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
    llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
    llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
    llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
    llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
    llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
    llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index 66801b7af542..ab55ee45a6b1 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -277,7 +277,6 @@ bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
 
 bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
   const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
-  bool HasFlat = ST.hasFlatAddressSpace();
   bool HasApertureRegs = ST.hasApertureRegs();
   SmallPtrSet<const Constant *, 8> ConstantExprVisited;
 
@@ -347,8 +346,8 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
   // TODO: We could refine this to captured pointers that could possibly be
   // accessed by flat instructions. For now this is mostly a poor way of
   // estimating whether there are calls before argument lowering.
-  if (HasFlat && !IsFunc && HaveCall) {
-    F.addFnAttr("amdgpu-flat-scratch");
+  if (!IsFunc && HaveCall) {
+    F.addFnAttr("amdgpu-calls");
     Changed = true;
   }
 

diff  --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 0c67b1467a5d..300b288405bb 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -54,6 +54,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
 
   Occupancy = ST.computeOccupancy(MF, getLDSSize());
   CallingConv::ID CC = F.getCallingConv();
+  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+
+  // FIXME: Should have analysis or something rather than attribute to detect
+  // calls.
+  const bool HasCalls = FrameInfo.hasCalls() || F.hasFnAttribute("amdgpu-calls");
 
   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
     if (!F.arg_empty())
@@ -107,7 +112,6 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
   if (F.hasFnAttribute("amdgpu-work-item-id-z"))
     WorkItemIDZ = true;
 
-  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
   bool HasStackObjects = FrameInfo.hasStackObjects();
 
   if (isEntryFunction()) {
@@ -158,7 +162,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
     };
     // TODO: This could be refined a lot. The attribute is a poor way of
     // detecting calls that may require it before argument lowering.
-    if (hasNonSpillStackObjects() || F.hasFnAttribute("amdgpu-flat-scratch"))
+    if (HasCalls || hasNonSpillStackObjects())
       FlatScratchInit = true;
   }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index ca6739f5d37e..82ba28526d35 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -319,4 +319,4 @@ attributes #3 = { nounwind }
 ; HSA: attributes #16 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
 ; HSA: attributes #17 = { nounwind "uniform-work-group-size"="false" }
 ; HSA: attributes #18 = { nounwind }
-; HSA: attributes #19 = { nounwind "amdgpu-flat-scratch" "uniform-work-group-size"="false" }
+; HSA: attributes #19 = { nounwind "amdgpu-calls" "uniform-work-group-size"="false" }

diff  --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
index 51002e8aee14..022ff8547ef5 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s 
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
 
 ; If the kernel does not have the uniform-work-group-attribute, set both callee and caller as false
 
@@ -7,7 +7,7 @@ define void @foo() #0 {
   ret void
 }
 
-; CHECK: define amdgpu_kernel void @kernel1() #[[FOO]] {
+; CHECK: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] {
 define amdgpu_kernel void @kernel1() #1 {
   call void @foo()
   ret void
@@ -16,3 +16,4 @@ define amdgpu_kernel void @kernel1() #1 {
 attributes #0 = { "uniform-work-group-size"="true" }
 
 ; CHECK: attributes #[[FOO]] = { "uniform-work-group-size"="false" }
+; CHECK: attributes #[[KERNEL1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }

diff  --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
index 837611480436..321b8cb086a0 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s 
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
 
 ; Test to verify if the attribute gets propagated across nested function calls
 
@@ -13,7 +13,7 @@ define void @func2() #1 {
   ret void
 }
 
-; CHECK: define amdgpu_kernel void @kernel3() #[[FUNC:[0-9]+]] {
+; CHECK: define amdgpu_kernel void @kernel3() #[[KERNEL:[0-9]+]] {
 define amdgpu_kernel void @kernel3() #2 {
   call void @func2()
   ret void
@@ -22,3 +22,4 @@ define amdgpu_kernel void @kernel3() #2 {
 attributes #2 = { "uniform-work-group-size"="true" }
 
 ; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="true" }
+; CHECK: attributes #[[KERNEL]] = { "amdgpu-calls" "uniform-work-group-size"="true" }

diff  --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
index 4a332f66321a..78136d90d6f1 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s 
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
 
 ; Two kernels with 
diff erent values of the uniform-work-group-attribute call the same function
 
@@ -13,7 +13,7 @@ define amdgpu_kernel void @kernel1() #1 {
   ret void
 }
 
-; CHECK: define amdgpu_kernel void @kernel2() #[[FUNC]] {
+; CHECK: define amdgpu_kernel void @kernel2() #[[KERNEL2:[0-9]+]] {
 define amdgpu_kernel void @kernel2() #2 {
   call void @func()
   ret void
@@ -22,4 +22,5 @@ define amdgpu_kernel void @kernel2() #2 {
 attributes #1 = { "uniform-work-group-size"="true" }
 
 ; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="false" }
-; CHECK: attributes #[[KERNEL1]] = { "uniform-work-group-size"="true" }
+; CHECK: attributes #[[KERNEL1]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[KERNEL2]] = { "amdgpu-calls" "uniform-work-group-size"="false" }

diff  --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
index 15131a4e31e3..4214587842fa 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s 
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
 
 ; Propagate the uniform-work-group-attribute from the kernel to callee if it doesn't have it
 ; CHECK: define void @func() #[[FUNC:[0-9]+]] {
@@ -29,5 +29,5 @@ attributes #1 = { "uniform-work-group-size"="false" }
 attributes #2 = { "uniform-work-group-size"="true" }
 
 ; CHECK: attributes #[[FUNC]] = { nounwind "uniform-work-group-size"="false" }
-; CHECK: attributes #[[KERNEL1]] = { "uniform-work-group-size"="false" }
-; CHECK: attributes #[[KERNEL2]] = { "uniform-work-group-size"="true" }
+; CHECK: attributes #[[KERNEL1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[KERNEL2]] = { "amdgpu-calls" "uniform-work-group-size"="true" }

diff  --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
index 9d07a887aa19..0b6053fb4c4a 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s 
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
 
 ; Test to ensure recursive functions exhibit proper behaviour
 ; Test to generate fibonacci numbers
@@ -25,7 +25,7 @@ exit:
   ret i32 1
 }
 
-; CHECK: define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #[[FIB]] {
+; CHECK: define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #[[KERNEL:[0-9]+]] {
 define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 {
   %r = call i32 @fib(i32 5)
   store i32 %r, i32 addrspace(1)* %m
@@ -35,3 +35,4 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 {
 attributes #1 = { "uniform-work-group-size"="true" }
 
 ; CHECK: attributes #[[FIB]] = { "uniform-work-group-size"="true" }
+; CHECK: attributes #[[KERNEL]] = { "amdgpu-calls" "uniform-work-group-size"="true" }

diff  --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
index 0a3cae8c4676..a9a2f12ef1f4 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
@@ -32,4 +32,4 @@ define amdgpu_kernel void @kernel3() #0 {
 
 attributes #0 = { "uniform-work-group-size"="false" }
 
-; CHECK: attributes #[[FUNC]] = { "amdgpu-flat-scratch" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[FUNC]] = { "amdgpu-calls" "uniform-work-group-size"="false" }