[llvm] e06d707 - [AMDGPU] Fixed function traversal in attribute propagation

Wed Mar 25 18:53:19 PDT 2020

Author: Stanislav Mekhanoshin
Date: 2020-03-25T18:47:09-07:00
New Revision: e06d707aa2ae4582b26f8b4839f07574cc67deee

URL: https://github.com/llvm/llvm-project/commit/e06d707aa2ae4582b26f8b4839f07574cc67deee
DIFF: https://github.com/llvm/llvm-project/commit/e06d707aa2ae4582b26f8b4839f07574cc67deee.diff

LOG: [AMDGPU] Fixed function traversal in attribute propagation

AMDGPUPropagateAttributes pass was skipping some of the functions
when cloning. Functions were added to root set and then skipped
on the next interation because they are already in the root set,
while were meant to be processed with different features.

Differential Revision: https://reviews.llvm.org/D76815

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
    llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
index 154a4eae8ce2..0ad4eebcf3f9 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
@@ -192,12 +192,13 @@ bool AMDGPUPropagateAttributes::process() {
     NewRoots.clear();
 
     for (auto &F : M.functions()) {
-      if (F.isDeclaration() || Roots.count(&F) || Roots.count(&F))
+      if (F.isDeclaration())
         continue;
 
       const FeatureBitset &CalleeBits =
         TM->getSubtargetImpl(F)->getFeatureBits();
       SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
+      SmallSet<CallBase *, 32> Visited;
 
       for (User *U : F.users()) {
         Instruction *I = dyn_cast<Instruction>(U);
@@ -207,16 +208,17 @@ bool AMDGPUPropagateAttributes::process() {
         if (!CI)
           continue;
         Function *Caller = CI->getCaller();
-        if (!Caller)
+        if (!Caller || !Visited.insert(CI).second)
           continue;
-        if (!Roots.count(Caller))
+        if (!Roots.count(Caller) && !NewRoots.count(Caller))
           continue;
 
         const FeatureBitset &CallerBits =
           TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures;
 
         if (CallerBits == (CalleeBits  & TargetFeatures)) {
-          NewRoots.insert(&F);
+          if (!Roots.count(&F))
+            NewRoots.insert(&F);
           continue;
         }
 
@@ -258,6 +260,9 @@ bool AMDGPUPropagateAttributes::process() {
       F->eraseFromParent();
   }
 
+  Roots.clear();
+  Clones.clear();
+
   return Changed;
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll
index b55a87de4be2..580fb31bc202 100644
--- a/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll
+++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll
@@ -7,17 +7,54 @@
 ; OPT-EXT: define void @foo3() local_unnamed_addr #1
 ; OPT-INT: define internal fastcc void @foo3.2() unnamed_addr #1
 ; OPT-EXT: define void @foo2() local_unnamed_addr #1
-; OPT-INT: define internal fastcc void @foo2() unnamed_addr #1
+; OPT-INT: define internal fastcc void @foo2.3() unnamed_addr #1
 ; OPT-EXT: define void @foo1() local_unnamed_addr #1
+; OPT-EXT:  tail call void @foo4()
+; OPT-EXT:  tail call void @foo3()
+; OPT-EXT:  tail call void @foo2()
+; OPT-EXT:  tail call void @foo2()
+; OPT-EXT:  tail call void @foo1()
+; OPT-EXT:  tail call fastcc void @0()
 ; OPT-INT: define internal fastcc void @foo1.1() unnamed_addr #1
+; OPT-INT:  tail call void @foo4()
+; OPT-INT:  tail call fastcc void @foo3.2()
+; OPT-INT:  tail call fastcc void @foo2.3()
+; OPT-INT:  tail call fastcc void @foo2.3()
+; OPT-INT:  tail call fastcc void @foo1.1()
+; OPT-INT:  tail call fastcc void @0()
+; OPT:      ret void
 ; OPT: define amdgpu_kernel void @kernel1() local_unnamed_addr #2
+; OPT-EXT:  tail call fastcc void @foo1.1()
+; OPT-INT:  tail call fastcc void @foo1()
+; OPT:      ret void
 ; OPT: define amdgpu_kernel void @kernel2() local_unnamed_addr #3
+; OPT-EXT:  tail call void @foo2()
+; OPT-INT:  tail call fastcc void @foo2.3()
+; OPT:      ret void
 ; OPT: define amdgpu_kernel void @kernel3() local_unnamed_addr #3
+; OPT-EXT:  tail call void @foo1()
+; OPT-INT:  tail call fastcc void @foo1.1()
+; OPT:      ret void
 ; OPT-EXT: define internal fastcc void @foo1.1() unnamed_addr #4
+; OPT-EXT:  tail call void @foo4()
+; OPT-EXT:  tail call fastcc void @foo3.2()
+; OPT-EXT:  tail call fastcc void @foo2.3()
+; OPT-EXT:  tail call fastcc void @foo2.3()
+; OPT-EXT:  tail call fastcc void @foo1.1()
+; OPT-EXT:  tail call fastcc void @1()
 ; OPT-INT: define internal fastcc void @foo1() unnamed_addr #4
+; OPT-INT:  tail call void @foo4()
+; OPT-INT:  tail call fastcc void @foo3()
+; OPT-INT:  tail call fastcc void @foo2()
+; OPT-INT:  tail call fastcc void @foo2()
+; OPT-INT:  tail call fastcc void @foo1()
+; OPT-INT:  tail call fastcc void @1()
+; OPT:      ret void
 ; OPT: define internal fastcc void @1() unnamed_addr #4
 ; OPT-EXT: define internal fastcc void @foo3.2() unnamed_addr #4
 ; OPT-INT: define internal fastcc void @foo3() unnamed_addr #4
+; OPT-EXT: define internal fastcc void @foo2.3() unnamed_addr #4
+; OPT-INT: define internal fastcc void @foo2() unnamed_addr #4
 ; OPT: attributes #0 = { {{.*}} "target-features"="+wavefrontsize64" }
 ; OPT: attributes #1 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,-wavefrontsize32,+wavefrontsize64{{.*}}" }
 ; OPT: attributes #2 = { {{.*}} "target-features"="+wavefrontsize32" }