[llvm] 14a68b4 - [CGSCC] Detect devirtualization in more cases

Arthur Eubanks via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 23 11:55:59 PST 2020


Author: Arthur Eubanks
Date: 2020-11-23T11:55:20-08:00
New Revision: 14a68b4aa9732293ad7e16f105b0feb53dc8dbe2

URL: https://github.com/llvm/llvm-project/commit/14a68b4aa9732293ad7e16f105b0feb53dc8dbe2
DIFF: https://github.com/llvm/llvm-project/commit/14a68b4aa9732293ad7e16f105b0feb53dc8dbe2.diff

LOG: [CGSCC] Detect devirtualization in more cases

The devirtualization wrapper misses cases where if it wraps a pass
manager, an individual pass may devirtualize an indirect call created by
a previous pass. For example, inlining may create a new indirect call
which is devirtualized by instcombine. Currently the devirtualization
wrapper will not see that because it only checks cgscc edges at the very
beginning and end of the pass (manager) it wraps.

This fixes some tests testing this exact behavior in the legacy PM.

Instead of checking WeakTrackingVHs for CallBases at the very beginning
and end of the pass it wraps, check every time
updateCGAndAnalysisManagerForPass() is called.

check-llvm and check-clang with -abort-on-max-devirt-iterations-reached
on by default doesn't show any failures outside of tests specifically
testing it so it doesn't needlessly rerun passes more than necessary.
(The NPM -O2/3 pipeline run the inliner/function simplification pipeline
under a devirtualization repeater pass up to 4 times by default).

http://llvm-compile-time-tracker.com/?config=O3&stat=instructions&remote=aeubanks
shows that 7zip has ~1% compile time regression. I looked at it and saw
that there indeed was devirtualization happening that was not previously
caught, so now it reruns the CGSCC pipeline on some SCCs, which is WAI.

Reviewed By: asbirlea

Differential Revision: https://reviews.llvm.org/D89587

Added: 
    llvm/test/Transforms/Inline/devirtualize-5.ll
    llvm/test/Transforms/Inline/devirtualize-6.ll

Modified: 
    llvm/include/llvm/Analysis/CGSCCPassManager.h
    llvm/lib/Analysis/CGSCCPassManager.cpp
    llvm/lib/Passes/PassBuilder.cpp
    llvm/test/Transforms/Inline/devirtualize-3.ll
    llvm/test/Transforms/Inline/devirtualize.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h
index 755bc92ddccf..b2b690ddd28b 100644
--- a/llvm/include/llvm/Analysis/CGSCCPassManager.h
+++ b/llvm/include/llvm/Analysis/CGSCCPassManager.h
@@ -90,6 +90,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/PriorityWorklist.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -314,6 +315,16 @@ struct CGSCCUpdateResult {
   /// for a better technique.
   SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4>
       &InlinedInternalEdges;
+
+  /// Weak VHs to keep track of indirect calls for the purposes of detecting
+  /// devirtualization.
+  ///
+  /// This is a map to avoid having duplicate entries. If a Value is
+  /// deallocated, its corresponding WeakTrackingVH will be nulled out. When
+  /// checking if a Value is in the map or not, also check if the corresponding
+  /// WeakTrackingVH is null to avoid issues with a new Value sharing the same
+  /// address as a deallocated one.
+  SmallMapVector<Value *, WeakTrackingVH, 16> IndirectVHs;
 };
 
 /// The core module pass which does a post-order walk of the SCCs and
@@ -596,9 +607,6 @@ class DevirtSCCRepeatedPass
     // a pointer that we can update.
     LazyCallGraph::SCC *C = &InitialC;
 
-    // Collect value handles for all of the indirect call sites.
-    SmallVector<WeakTrackingVH, 8> CallHandles;
-
     // Struct to track the counts of direct and indirect calls in each function
     // of the SCC.
     struct CallCount {
@@ -608,35 +616,37 @@ class DevirtSCCRepeatedPass
 
     // Put value handles on all of the indirect calls and return the number of
     // direct calls for each function in the SCC.
-    auto ScanSCC = [](LazyCallGraph::SCC &C,
-                      SmallVectorImpl<WeakTrackingVH> &CallHandles) {
-      assert(CallHandles.empty() && "Must start with a clear set of handles.");
-
-      SmallDenseMap<Function *, CallCount> CallCounts;
-      CallCount CountLocal = {0, 0};
-      for (LazyCallGraph::Node &N : C) {
-        CallCount &Count =
-            CallCounts.insert(std::make_pair(&N.getFunction(), CountLocal))
-                .first->second;
-        for (Instruction &I : instructions(N.getFunction()))
-          if (auto *CB = dyn_cast<CallBase>(&I)) {
-            if (CB->getCalledFunction()) {
-              ++Count.Direct;
-            } else {
-              ++Count.Indirect;
-              CallHandles.push_back(WeakTrackingVH(&I));
-            }
+    auto ScanSCC =
+        [](LazyCallGraph::SCC &C,
+           SmallMapVector<Value *, WeakTrackingVH, 16> &CallHandles) {
+          assert(CallHandles.empty() &&
+                 "Must start with a clear set of handles.");
+
+          SmallDenseMap<Function *, CallCount> CallCounts;
+          CallCount CountLocal = {0, 0};
+          for (LazyCallGraph::Node &N : C) {
+            CallCount &Count =
+                CallCounts.insert(std::make_pair(&N.getFunction(), CountLocal))
+                    .first->second;
+            for (Instruction &I : instructions(N.getFunction()))
+              if (auto *CB = dyn_cast<CallBase>(&I)) {
+                if (CB->getCalledFunction()) {
+                  ++Count.Direct;
+                } else {
+                  ++Count.Indirect;
+                  CallHandles.insert({CB, WeakTrackingVH(CB)});
+                }
+              }
           }
-      }
 
-      return CallCounts;
-    };
+          return CallCounts;
+        };
 
+    UR.IndirectVHs.clear();
     // Populate the initial call handles and get the initial call counts.
-    auto CallCounts = ScanSCC(*C, CallHandles);
+    auto CallCounts = ScanSCC(*C, UR.IndirectVHs);
 
     for (int Iteration = 0;; ++Iteration) {
-
       if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C))
         continue;
 
@@ -659,33 +669,22 @@ class DevirtSCCRepeatedPass
       assert(C->begin() != C->end() && "Cannot have an empty SCC!");
 
       // Check whether any of the handles were devirtualized.
-      auto IsDevirtualizedHandle = [&](WeakTrackingVH &CallH) {
-        if (!CallH)
-          return false;
-        auto *CB = dyn_cast<CallBase>(CallH);
-        if (!CB)
-          return false;
-
-        // If the call is still indirect, leave it alone.
-        Function *F = CB->getCalledFunction();
-        if (!F)
-          return false;
-
-        LLVM_DEBUG(dbgs() << "Found devirtualized call from "
-                          << CB->getParent()->getParent()->getName() << " to "
-                          << F->getName() << "\n");
-
-        // We now have a direct call where previously we had an indirect call,
-        // so iterate to process this devirtualization site.
-        return true;
-      };
-      bool Devirt = llvm::any_of(CallHandles, IsDevirtualizedHandle);
+      bool Devirt = llvm::any_of(UR.IndirectVHs, [](auto &P) -> bool {
+        if (P.second) {
+          CallBase *CB = cast<CallBase>(P.second);
+          if (CB->getCalledFunction()) {
+            LLVM_DEBUG(dbgs() << "Found devirtualized call: " << *CB << "\n");
+            return true;
+          }
+        }
+        return false;
+      });
 
       // Rescan to build up a new set of handles and count how many direct
       // calls remain. If we decide to iterate, this also sets up the input to
       // the next iteration.
-      CallHandles.clear();
-      auto NewCallCounts = ScanSCC(*C, CallHandles);
+      UR.IndirectVHs.clear();
+      auto NewCallCounts = ScanSCC(*C, UR.IndirectVHs);
 
       // If we haven't found an explicit devirtualization already see if we
       // have decreased the number of indirect calls and increased the number
@@ -790,7 +789,8 @@ ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>::run(Module &M,
 
   CGSCCUpdateResult UR = {
       RCWorklist, CWorklist, InvalidRefSCCSet,         InvalidSCCSet,
-      nullptr,    nullptr,   PreservedAnalyses::all(), InlinedInternalEdges};
+      nullptr,    nullptr,   PreservedAnalyses::all(), InlinedInternalEdges,
+      {}};
 
   // Request PassInstrumentation from analysis manager, will use it to run
   // instrumenting callbacks for the passes later.

diff  --git a/llvm/lib/Analysis/CGSCCPassManager.cpp b/llvm/lib/Analysis/CGSCCPassManager.cpp
index 627ad03cd045..95d2ebf5f9fb 100644
--- a/llvm/lib/Analysis/CGSCCPassManager.cpp
+++ b/llvm/lib/Analysis/CGSCCPassManager.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PassManagerImpl.h"
+#include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -476,9 +477,9 @@ static LazyCallGraph::SCC &updateCGAndAnalysisManagerForPass(
   // First walk the function and handle all called functions. We do this first
   // because if there is a single call edge, whether there are ref edges is
   // irrelevant.
-  for (Instruction &I : instructions(F))
-    if (auto *CB = dyn_cast<CallBase>(&I))
-      if (Function *Callee = CB->getCalledFunction())
+  for (Instruction &I : instructions(F)) {
+    if (auto *CB = dyn_cast<CallBase>(&I)) {
+      if (Function *Callee = CB->getCalledFunction()) {
         if (Visited.insert(Callee).second && !Callee->isDeclaration()) {
           Node *CalleeN = G.lookup(*Callee);
           if (!CalleeN) {
@@ -498,6 +499,17 @@ static LazyCallGraph::SCC &updateCGAndAnalysisManagerForPass(
           else if (!E->isCall())
             PromotedRefTargets.insert(CalleeN);
         }
+      } else {
+        // We can miss devirtualization if an indirect call is created then
+        // promoted before updateCGAndAnalysisManagerForPass runs.
+        auto *Entry = UR.IndirectVHs.find(CB);
+        if (Entry == UR.IndirectVHs.end())
+          UR.IndirectVHs.insert({CB, WeakTrackingVH(CB)});
+        else if (!Entry->second)
+          Entry->second = WeakTrackingVH(CB);
+      }
+    }
+  }
 
   // Now walk all references.
   for (Instruction &I : instructions(F))

diff  --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index de8108f7d65b..9fb4ce1b90e1 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1843,7 +1843,7 @@ static Optional<int> parseDevirtPassName(StringRef Name) {
   if (!Name.consume_front("devirt<") || !Name.consume_back(">"))
     return None;
   int Count;
-  if (Name.getAsInteger(0, Count) || Count <= 0)
+  if (Name.getAsInteger(0, Count) || Count < 0)
     return None;
   return Count;
 }

diff  --git a/llvm/test/Transforms/Inline/devirtualize-3.ll b/llvm/test/Transforms/Inline/devirtualize-3.ll
index 4165b2125dfc..987463a03766 100644
--- a/llvm/test/Transforms/Inline/devirtualize-3.ll
+++ b/llvm/test/Transforms/Inline/devirtualize-3.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -basic-aa -S -O2 < %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -S -passes='default<O2>' < %s | FileCheck %s
 ; PR5009
 
 ; CHECK: define i32 @main() 

diff  --git a/llvm/test/Transforms/Inline/devirtualize-5.ll b/llvm/test/Transforms/Inline/devirtualize-5.ll
new file mode 100644
index 000000000000..ab507d1d58a1
--- /dev/null
+++ b/llvm/test/Transforms/Inline/devirtualize-5.ll
@@ -0,0 +1,22 @@
+; RUN: opt -abort-on-max-devirt-iterations-reached -passes='cgscc(devirt<1>(inline,instcombine))' -S < %s | FileCheck %s
+; RUN: opt -abort-on-max-devirt-iterations-reached -passes='default<O2>' -S < %s | FileCheck %s
+
+define i32 @i() alwaysinline {
+  ret i32 45
+}
+
+; CHECK-LABEL: define i32 @main
+; CHECK-NEXT: ret i32 45
+
+define i32 @main() {
+  %a = alloca i32 ()*
+  store i32 ()* @i, i32 ()** %a
+  %r = call i32 @call(i32 ()** %a)
+  ret i32 %r
+}
+
+define i32 @call(i32 ()** %a) alwaysinline {
+  %c = load i32 ()*, i32 ()** %a
+  %r = call i32 %c()
+  ret i32 %r
+}

diff  --git a/llvm/test/Transforms/Inline/devirtualize-6.ll b/llvm/test/Transforms/Inline/devirtualize-6.ll
new file mode 100644
index 000000000000..fb0d021548b2
--- /dev/null
+++ b/llvm/test/Transforms/Inline/devirtualize-6.ll
@@ -0,0 +1,18 @@
+; Make sure we don't detect devirtualization on inlining a function with a direct call
+; RUN: opt -abort-on-max-devirt-iterations-reached -passes='cgscc(devirt<0>(inline))' -S < %s | FileCheck %s
+
+define i32 @i() noinline {
+  ret i32 45
+}
+
+; CHECK-NOT: call i32 @call()
+
+define i32 @main() {
+  %r = call i32 @call()
+  ret i32 %r
+}
+
+define i32 @call() alwaysinline {
+  %r = call i32 @i()
+  ret i32 %r
+}

diff  --git a/llvm/test/Transforms/Inline/devirtualize.ll b/llvm/test/Transforms/Inline/devirtualize.ll
index 561bb62ae644..eaba1a8c19c5 100644
--- a/llvm/test/Transforms/Inline/devirtualize.ll
+++ b/llvm/test/Transforms/Inline/devirtualize.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -S -Os < %s | FileCheck %s
+; RUN: opt -S -aa-pipeline=basic-aa -passes='default<Os>' < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 


        


More information about the llvm-commits mailing list