[llvm] r348971 - [AMDGPU] Support for "uniform-work-group-size" attribute

Wed Dec 12 12:49:17 PST 2018

Author: aakanksha555
Date: Wed Dec 12 12:49:17 2018
New Revision: 348971

URL: http://llvm.org/viewvc/llvm-project?rev=348971&view=rev
Log:
[AMDGPU] Support for "uniform-work-group-size" attribute

Updated the annotate-kernel-features pass to support the propagation of uniform-work-group attribute from the kernel to the called functions. Once this pass is run, all kernels, even the ones which initially did not have the attribute, will be able to indicate weather or not they have uniform work group size depending on the value of the attribute. 

Differential Revision: https://reviews.llvm.org/D50200


Added:
    llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
    llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
    llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
    llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
    llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
    llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-test.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
    llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp?rev=348971&r1=348970&r2=348971&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp Wed Dec 12 12:49:17 2018
@@ -46,8 +46,11 @@ namespace {
 class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
 private:
   const TargetMachine *TM = nullptr;
+  SmallVector<CallGraphNode*, 8> NodeList;
 
   bool addFeatureAttributes(Function &F);
+  bool processUniformWorkGroupAttribute();
+  bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee);
 
 public:
   static char ID;
@@ -186,7 +189,6 @@ static bool handleAttr(Function &Parent,
     Parent.addFnAttr(Name);
     return true;
   }
-
   return false;
 }
 
@@ -213,6 +215,56 @@ static void copyFeaturesToFunction(Funct
     handleAttr(Parent, Callee, AttrName);
 }
 
+bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
+  bool Changed = false;
+
+  for (auto *Node : reverse(NodeList)) {
+    Function *Caller = Node->getFunction();
+
+    for (auto I : *Node) {
+      Function *Callee = std::get<1>(I)->getFunction();
+      if (Callee)
+        Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
+    }
+  }
+
+  return Changed;
+}
+
+bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
+       Function &Caller, Function &Callee) {
+
+  // Check for externally defined function
+  if (!Callee.hasExactDefinition()) {
+    Callee.addFnAttr("uniform-work-group-size", "false");
+    if (!Caller.hasFnAttribute("uniform-work-group-size")) 
+      Caller.addFnAttr("uniform-work-group-size", "false");
+     
+    return true;
+  }
+  // Check if the Caller has the attribute
+  if (Caller.hasFnAttribute("uniform-work-group-size")) {
+    // Check if the value of the attribute is true
+    if (Caller.getFnAttribute("uniform-work-group-size")
+        .getValueAsString().equals("true")) {
+      // Propagate the attribute to the Callee, if it does not have it
+      if (!Callee.hasFnAttribute("uniform-work-group-size")) {
+        Callee.addFnAttr("uniform-work-group-size", "true");
+        return true;
+      }
+    } else {
+      Callee.addFnAttr("uniform-work-group-size", "false");
+      return true;
+    }
+  } else {
+    // If the attribute is absent, set it as false
+    Caller.addFnAttr("uniform-work-group-size", "false");
+    Callee.addFnAttr("uniform-work-group-size", "false");
+    return true;
+  }
+  return false;
+}
+
 bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
   const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
   bool HasFlat = ST.hasFlatAddressSpace();
@@ -293,15 +345,19 @@ bool AMDGPUAnnotateKernelFeatures::addFe
 }
 
 bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
-  Module &M = SCC.getCallGraph().getModule();
-  Triple TT(M.getTargetTriple());
-
   bool Changed = false;
+ 
   for (CallGraphNode *I : SCC) {
-    Function *F = I->getFunction();
+    // Build a list of CallGraphNodes from most number of uses to least
+    if (I->getNumReferences())
+      NodeList.push_back(I);
+    else
+      processUniformWorkGroupAttribute();
+
+    Function *F = I->getFunction();    
+    // Add feature attributes
     if (!F || F->isDeclaration())
       continue;
-
     Changed |= addFeatureAttributes(*F);
   }
 

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=348971&r1=348970&r2=348971&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Wed Dec 12 12:49:17 2018
@@ -683,6 +683,9 @@ void AMDGPUPassConfig::addIRPasses() {
 }
 
 void AMDGPUPassConfig::addCodeGenPrepare() {
+  if (TM->getTargetTriple().getArch() == Triple::amdgcn)
+    addPass(createAMDGPUAnnotateKernelFeaturesPass());
+
   if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
       EnableLowerKernelArguments)
     addPass(createAMDGPULowerKernelArgumentsPass());
@@ -770,7 +773,6 @@ bool GCNPassConfig::addPreISel() {
 
   // FIXME: We need to run a pass to propagate the attributes when calls are
   // supported.
-  addPass(createAMDGPUAnnotateKernelFeaturesPass());
 
   // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
   // regions formed by them.

Modified: llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll?rev=348971&r1=348970&r2=348971&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll Wed Dec 12 12:49:17 2018
@@ -244,52 +244,52 @@ define amdgpu_kernel void @kern_use_impl
   ret void
 }
 
-; HSA: define void @use_implicitarg_ptr() #15 {
+; HSA: define void @use_implicitarg_ptr() #16 {
 define void @use_implicitarg_ptr() #1 {
   %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
   store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef
   ret void
 }
 
-; HSA: define void @func_indirect_use_implicitarg_ptr() #15 {
+; HSA: define void @func_indirect_use_implicitarg_ptr() #16 {
 define void @func_indirect_use_implicitarg_ptr() #1 {
   call void @use_implicitarg_ptr()
   ret void
 }
 
-; HSA: declare void @external.func() #16
+; HSA: declare void @external.func() #17
 declare void @external.func() #3
 
-; HSA: define internal void @defined.func() #16 {
+; HSA: define internal void @defined.func() #17 {
 define internal void @defined.func() #3 {
   ret void
 }
 
-; HSA: define void @func_call_external() #16 {
+; HSA: define void @func_call_external() #17 {
 define void @func_call_external() #3 {
   call void @external.func()
   ret void
 }
 
-; HSA: define void @func_call_defined() #16 {
+; HSA: define void @func_call_defined() #17 {
 define void @func_call_defined() #3 {
   call void @defined.func()
   ret void
 }
 
-; HSA: define void @func_call_asm() #16 {
+; HSA: define void @func_call_asm() #18 {
 define void @func_call_asm() #3 {
   call void asm sideeffect "", ""() #3
   ret void
 }
 
-; HSA: define amdgpu_kernel void @kern_call_external() #17 {
+; HSA: define amdgpu_kernel void @kern_call_external() #19 {
 define amdgpu_kernel void @kern_call_external() #3 {
   call void @external.func()
   ret void
 }
 
-; HSA: define amdgpu_kernel void @func_kern_defined() #17 {
+; HSA: define amdgpu_kernel void @func_kern_defined() #19 {
 define amdgpu_kernel void @func_kern_defined() #3 {
   call void @defined.func()
   ret void
@@ -301,20 +301,22 @@ attributes #2 = { nounwind "target-cpu"=
 attributes #3 = { nounwind }
 
 ; HSA: attributes #0 = { nounwind readnone speculatable }
-; HSA: attributes #1 = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" }
-; HSA: attributes #2 = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" }
-; HSA: attributes #3 = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" }
-; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" }
-; HSA: attributes #5 = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" }
-; HSA: attributes #6 = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" }
-; HSA: attributes #7 = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" }
-; HSA: attributes #8 = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" }
-; HSA: attributes #9 = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" }
+; HSA: attributes #1 = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #2 = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #3 = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #5 = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #6 = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #7 = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #8 = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #9 = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" "uniform-work-group-size"="false" }
 ; HSA: attributes #10 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "target-cpu"="fiji" }
-; HSA: attributes #11 = { nounwind "target-cpu"="fiji" }
-; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" }
-; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" }
-; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" }
+; HSA: attributes #11 = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
 ; HSA: attributes #15 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" }
-; HSA: attributes #16 = { nounwind }
-; HSA: attributes #17 = { nounwind "amdgpu-flat-scratch" }
+; HSA: attributes #16 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #17 = { nounwind "uniform-work-group-size"="false" }
+; HSA: attributes #18 = { nounwind }
+; HSA: attributes #19 = { nounwind "amdgpu-flat-scratch" "uniform-work-group-size"="false" }

Added: llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll?rev=348971&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll Wed Dec 12 12:49:17 2018
@@ -0,0 +1,18 @@
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s 
+
+; If the kernel does not have the uniform-work-group-attribute, set both callee and caller as false
+
+; CHECK: define void @foo() #[[FOO:[0-9]+]] {
+define void @foo() #0 {
+  ret void
+}
+
+; CHECK: define amdgpu_kernel void @kernel1() #[[FOO]] {
+define amdgpu_kernel void @kernel1() #1 {
+  call void @foo()
+  ret void
+}
+
+attributes #0 = { "uniform-work-group-size"="true" }
+
+; CHECK: attributes #[[FOO]] = { "uniform-work-group-size"="false" }

Added: llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll?rev=348971&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll Wed Dec 12 12:49:17 2018
@@ -0,0 +1,24 @@
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s 
+
+; Test to verify if the attribute gets propagated across nested function calls
+
+; CHECK: define void @func1() #[[FUNC:[0-9]+]] {
+define void @func1() #0 {
+  ret void
+}
+
+; CHECK: define void @func2() #[[FUNC]] {
+define void @func2() #1 {
+  call void @func1()
+  ret void
+}
+
+; CHECK: define amdgpu_kernel void @kernel3() #[[FUNC:[0-9]+]] {
+define amdgpu_kernel void @kernel3() #2 {
+  call void @func2()
+  ret void
+}
+
+attributes #2 = { "uniform-work-group-size"="true" }
+
+; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="true" }

Added: llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll?rev=348971&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll Wed Dec 12 12:49:17 2018
@@ -0,0 +1,25 @@
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s 
+
+; Two kernels with different values of the uniform-work-group-attribute call the same function
+
+; CHECK: define void @func() #[[FUNC:[0-9]+]] {
+define void @func() #0 {
+  ret void
+}
+
+; CHECK: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] {
+define amdgpu_kernel void @kernel1() #1 {
+  call void @func()
+  ret void
+}
+
+; CHECK: define amdgpu_kernel void @kernel2() #[[FUNC]] {
+define amdgpu_kernel void @kernel2() #2 {
+  call void @func()
+  ret void
+}
+
+attributes #1 = { "uniform-work-group-size"="true" }
+
+; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="false" }
+; CHECK: attributes #[[KERNEL1]] = { "uniform-work-group-size"="true" }

Added: llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll?rev=348971&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll Wed Dec 12 12:49:17 2018
@@ -0,0 +1,33 @@
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s 
+
+; Propagate the uniform-work-group-attribute from the kernel to callee if it doesn't have it
+; CHECK: define void @func() #[[FUNC:[0-9]+]] {
+define void @func() #0 {
+  ret void
+}
+
+; CHECK: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] {
+define amdgpu_kernel void @kernel1() #1 {
+  call void @func()
+  ret void
+}
+
+; External declaration of a function
+; CHECK: define weak_odr void @weak_func() #[[FUNC]] {
+define weak_odr void @weak_func() #0 {
+  ret void
+}
+
+; CHECK: define amdgpu_kernel void @kernel2() #[[KERNEL2:[0-9]+]] {
+define amdgpu_kernel void @kernel2() #2 {
+  call void @weak_func()
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { "uniform-work-group-size"="false" }
+attributes #2 = { "uniform-work-group-size"="true" }
+
+; CHECK: attributes #[[FUNC]] = { nounwind "uniform-work-group-size"="false" }
+; CHECK: attributes #[[KERNEL1]] = { "uniform-work-group-size"="false" }
+; CHECK: attributes #[[KERNEL2]] = { "uniform-work-group-size"="true" }

Added: llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll?rev=348971&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll Wed Dec 12 12:49:17 2018
@@ -0,0 +1,37 @@
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s 
+
+; Test to ensure recursive functions exhibit proper behaviour
+; Test to generate fibonacci numbers
+
+; CHECK: define i32 @fib(i32 %n) #[[FIB:[0-9]+]] {
+define i32 @fib(i32 %n) #0 {
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %exit, label %cont1
+
+cont1:
+  %cmp2 = icmp eq i32 %n, 1
+  br i1 %cmp2, label %exit, label %cont2
+
+cont2:
+  %nm1 = sub i32 %n, 1
+  %fibm1 = call i32 @fib(i32 %nm1)
+  %nm2 = sub i32 %n, 2
+  %fibm2 = call i32 @fib(i32 %nm2)
+  %retval = add i32 %fibm1, %fibm2
+
+  ret i32 %retval
+
+exit:
+  ret i32 1
+}
+
+; CHECK: define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #[[FIB]] {
+define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 {
+  %r = call i32 @fib(i32 5)
+  store i32 %r, i32 addrspace(1)* %m
+  ret void
+}
+
+attributes #1 = { "uniform-work-group-size"="true" }
+
+; CHECK: attributes #[[FIB]] = { "uniform-work-group-size"="true" }

Added: llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-test.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-test.ll?rev=348971&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-test.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/uniform-work-group-test.ll Wed Dec 12 12:49:17 2018
@@ -0,0 +1,35 @@
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s 
+
+; CHECK: define void @func1() #[[FUNC:[0-9]+]] {
+define void @func1() #0 {
+  ret void
+}
+
+; CHECK: define void @func4() #[[FUNC]] {
+define void @func4() #1 {
+  ret void
+}
+
+; CHECK: define void @func2() #[[FUNC]] {
+define void @func2() #1 {
+  call void @func4()
+  call void @func1()
+  ret void
+}
+
+; CHECK: define void @func3() #[[FUNC]] {
+define void @func3() #1 {
+  call void @func1()
+  ret void
+}
+
+; CHECK: define amdgpu_kernel void @kernel3() #[[FUNC]] {
+define amdgpu_kernel void @kernel3() #2 {
+  call void @func2()
+  call void @func3()
+  ret void
+}
+
+attributes #2 = { "uniform-work-group-size"="true" }
+
+; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="true" }