[llvm] 87a85f3 - [Attributor] Use internalized version of non-exact functions

Luofan Chen via llvm-commits llvm-commits at lists.llvm.org
Sat Aug 15 05:24:23 PDT 2020


Author: Luofan Chen
Date: 2020-08-15T20:23:38+08:00
New Revision: 87a85f3d57f55f5652ec44f77816c7c9457545fa

URL: https://github.com/llvm/llvm-project/commit/87a85f3d57f55f5652ec44f77816c7c9457545fa
DIFF: https://github.com/llvm/llvm-project/commit/87a85f3d57f55f5652ec44f77816c7c9457545fa.diff

LOG: [Attributor] Use internalized version of non-exact functions

This patch internalize non-exact functions and replaces of their uses
with the internalized version. Doing this enables the analysis of
non-exact functions.

We can do this because some non-exact functions with the same name
whose linkage is `linkonce_odr` or `weak_odr` should have the same
semantics, so we can safely internalize and replace use of them (the
result of the other version of this function should be the same.).
Note that not all functions can be internalized, e.g., function with
`linkonce` or `weak` linkage.

For now when specified in commandline, we internalize all functions
that meet the requirements without calculating the cost of such
internalzation.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D84167

Added: 
    llvm/test/Transforms/Attributor/internalize.ll

Modified: 
    llvm/lib/Transforms/IPO/Attributor.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 764d71b98e06..6599ff6d6246 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -21,8 +21,10 @@
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/NoFolder.h"
 #include "llvm/IR/Verifier.h"
@@ -34,6 +36,7 @@
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
 
 #include <cassert>
@@ -88,6 +91,12 @@ static cl::opt<bool>
                                   "wrappers for non-exact definitions."),
                          cl::init(false));
 
+static cl::opt<bool>
+    AllowDeepWrapper("attributor-allow-deep-wrappers", cl::Hidden,
+                     cl::desc("Allow the Attributor to use IP information "
+                              "derived from non-exact functions via cloning"),
+                     cl::init(false));
+
 static cl::list<std::string>
     SeedAllowList("attributor-seed-allow-list", cl::Hidden,
                   cl::desc("Comma seperated list of attrbute names that are "
@@ -1413,6 +1422,52 @@ static void createShallowWrapper(Function &F) {
   NumFnShallowWrapperCreated++;
 }
 
+/// Make another copy of the function \p F such that the copied version has
+/// internal linkage afterwards and can be analysed. Then we replace all uses
+/// of the original function to the copied one
+///
+/// Only non-exactly defined functions that have `linkonce_odr` or `weak_odr`
+/// linkage can be internalized because these linkages guarantee that other
+/// definitions with the same name have the same semantics as this one
+///
+static Function *internalizeFunction(Function &F) {
+  assert(AllowDeepWrapper && "Cannot create a copy if not allowed.");
+  assert(!F.isDeclaration() && !F.hasExactDefinition() &&
+         !GlobalValue::isInterposableLinkage(F.getLinkage()) &&
+         "Trying to internalize function which cannot be internalized.");
+
+  Module &M = *F.getParent();
+  FunctionType *FnTy = F.getFunctionType();
+
+  // create a copy of the current function
+  Function *Copied =
+      Function::Create(FnTy, GlobalValue::PrivateLinkage, F.getAddressSpace(),
+                       F.getName() + ".internalized");
+  ValueToValueMapTy VMap;
+  auto *NewFArgIt = Copied->arg_begin();
+  for (auto &Arg : F.args()) {
+    auto ArgName = Arg.getName();
+    NewFArgIt->setName(ArgName);
+    VMap[&Arg] = &(*NewFArgIt++);
+  }
+  SmallVector<ReturnInst *, 8> Returns;
+
+  // Copy the body of the original function to the new one
+  CloneFunctionInto(Copied, &F, VMap, /* ModuleLevelChanges */ false, Returns);
+
+  // Copy metadata
+  SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+  F.getAllMetadata(MDs);
+  for (auto MDIt : MDs)
+    Copied->addMetadata(MDIt.first, *MDIt.second);
+
+  M.getFunctionList().insert(F.getIterator(), Copied);
+  F.replaceAllUsesWith(Copied);
+  Copied->setDSOLocal(true);
+
+  return Copied;
+}
+
 bool Attributor::isValidFunctionSignatureRewrite(
     Argument &Arg, ArrayRef<Type *> ReplacementTypes) {
 
@@ -2145,6 +2200,27 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
       if (!A.isFunctionIPOAmendable(*F))
         createShallowWrapper(*F);
 
+  // Internalize non-exact functions
+  // TODO: for now we eagerly internalize functions without calculating the
+  //       cost, we need a cost interface to determine whether internalizing
+  //       a function is "benefitial"
+  if (AllowDeepWrapper) {
+    for (Function *F : Functions)
+      if (!F->isDeclaration() && !F->isDefinitionExact() && F->getNumUses() &&
+          !GlobalValue::isInterposableLinkage(F->getLinkage())) {
+        Function *NewF = internalizeFunction(*F);
+        Functions.insert(NewF);
+
+        // Update call graph
+        CGUpdater.registerOutlinedFunction(*NewF);
+        for (const Use &U : NewF->uses())
+          if (CallBase *CB = dyn_cast<CallBase>(U.getUser())) {
+            auto *CallerF = CB->getCaller();
+            CGUpdater.reanalyzeFunction(*CallerF);
+          }
+      }
+  }
+
   for (Function *F : Functions) {
     if (F->hasExactDefinition())
       NumFnWithExactDefinition++;

diff  --git a/llvm/test/Transforms/Attributor/internalize.ll b/llvm/test/Transforms/Attributor/internalize.ll
new file mode 100644
index 000000000000..7773bfbb2050
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/internalize.ll
@@ -0,0 +1,143 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes
+; Deep Wrapper disabled
+
+; RUN: opt -attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM,CHECK_DISABLED,NOT_CGSCC_NPM_DISABLED,NOT_CGSCC_OPM_DISABLED,NOT_TUNIT_NPM_DISABLED,IS__TUNIT_____DISABLED,IS________OPM_DISABLED,IS__TUNIT_OPM_DISABLED
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5  -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM,CHECK_DISABLED,NOT_CGSCC_OPM_DISABLED,NOT_CGSCC_NPM_DISABLED,NOT_TUNIT_OPM_DISABLED,IS__TUNIT_____DISABLED,IS________NPM_DISABLED,IS__TUNIT_NPM_DISABLED
+; RUN: opt -attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM,CHECK_DISABLED,NOT_TUNIT_NPM_DISABLED,NOT_TUNIT_OPM_DISABLED,NOT_CGSCC_NPM_DISABLED,IS__CGSCC_____DISABLED,IS________OPM_DISABLED,IS__CGSCC_OPM_DISABLED
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM,CHECK_DISABLED,NOT_TUNIT_NPM_DISABLED,NOT_TUNIT_OPM_DISABLED,NOT_CGSCC_OPM_DISABLED,IS__CGSCC_____DISABLED,IS________NPM_DISABLED,IS__CGSCC_NPM_DISABLED
+
+; Deep Wrapper enabled
+
+; RUN: opt -attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM,CHECK_ENABLED,NOT_CGSCC_NPM_ENABLED,NOT_CGSCC_OPM_ENABLED,NOT_TUNIT_NPM_ENABLED,IS__TUNIT_____ENABLED,IS________OPM_ENABLED,IS__TUNIT_OPM_ENABLED
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM,CHECK_ENABLED,NOT_CGSCC_OPM_ENABLED,NOT_CGSCC_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,IS__TUNIT_____ENABLED,IS________NPM_ENABLED,IS__TUNIT_NPM_ENABLED
+; RUN: opt -attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM,CHECK_ENABLED,NOT_TUNIT_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,NOT_CGSCC_NPM_ENABLED,IS__CGSCC_____ENABLED,IS________OPM_ENABLED,IS__CGSCC_OPM_ENABLED
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM,CHECK_ENABLED,NOT_TUNIT_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,NOT_CGSCC_OPM_ENABLED,IS__CGSCC_____ENABLED,IS________NPM_ENABLED,IS__CGSCC_NPM_ENABLED
+; RUN: opt -attributor -attributor-cgscc -disable-inlining -attributor-allow-deep-wrappers -S < %s | FileCheck %s --check-prefix=DWRAPPER
+
+; TEST 1: This function is of linkage `linkonce`, we cannot internalize this
+;         function and use information derived from it
+;
+; DWRAPPER-NOT: Function Attrs
+; DWRAPPER-NOT: inner1.internalized
+define linkonce i32 @inner1(i32 %a, i32 %b) {
+; CHECK-LABEL: define {{[^@]+}}@inner1
+; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+entry:
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+; TEST 2: This function is of linkage `weak`, we cannot internalize this function and
+;         use information derived from it
+;
+; DWRAPPER-NOT: Function Attrs
+; DWRAPPER-NOT: inner2.internalized
+define weak i32 @inner2(i32 %a, i32 %b) {
+; CHECK-LABEL: define {{[^@]+}}@inner2
+; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+entry:
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+; TEST 3: This function is of linkage `linkonce_odr`, which can be internalized using the
+;         deep wrapper, and the IP information derived from this function can be used
+;
+; DWRAPPER: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; DWRAPPER: define private i32 @inner3.internalized(i32 %a, i32 %b)
+; DWRAPPER-NEXT: entry:
+; DWRAPPER-NEXT:   %c = add i32 %a, %b
+; DWRAPPER-NEXT:   ret i32 %c
+define linkonce_odr i32 @inner3(i32 %a, i32 %b) {
+; CHECK-LABEL: define {{[^@]+}}@inner3
+; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+entry:
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+; TEST 4: This function is of linkage `weak_odr`, which can be internalized using the deep
+;         wrapper
+;
+; DWRAPPER: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; DWRAPPER: define private i32 @inner4.internalized(i32 %a, i32 %b)
+; DWRAPPER-NEXT: entry:
+; DWRAPPER-NEXT:   %c = add i32 %a, %b
+; DWRAPPER-NEXT:   ret i32 %c
+define weak_odr i32 @inner4(i32 %a, i32 %b) {
+; CHECK-LABEL: define {{[^@]+}}@inner4
+; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+entry:
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+; TEST 5: This function has linkage `linkonce_odr` but is never called (num of use = 0), so there
+;         is no need to internalize this
+;
+; DWRAPPER-NOT: inner5.internalized
+define linkonce_odr i32 @inner5(i32 %a, i32 %b) {
+; CHECK-LABEL: define {{[^@]+}}@inner5
+; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+entry:
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+; Since the inner1 cannot be internalized, there should be no change to its callsite
+; Since the inner2 cannot be internalized, there should be no change to its callsite
+; Since the inner3 is internalized, the use of the original function should be replaced by the
+;  copied one
+;
+; DWRAPPER-NOT: call i32 @inner1.internalized
+; DWRAPPER: call i32 @inner1
+; DWRAPPER-NOT: call i32 @inner2.internalized
+; DWRAPPER: call i32 @inner2
+; DWRAPPER-NOT: call i32 @inner3
+; DWRAPPER: call i32 @inner3.internalized
+; DWRAPPER-NOT: call i32 @inner4
+; DWRAPPER: call i32 @inner4.internalized
+define i32 @outer1() {
+; CHECK_DISABLED-LABEL: define {{[^@]+}}@outer1()
+; CHECK_DISABLED-NEXT:  entry:
+; CHECK_DISABLED-NEXT:    [[RET1:%.*]] = call i32 @inner1(i32 1, i32 2)
+; CHECK_DISABLED-NEXT:    [[RET2:%.*]] = call i32 @inner2(i32 1, i32 2)
+; CHECK_DISABLED-NEXT:    [[RET3:%.*]] = call i32 @inner3(i32 [[RET1]], i32 [[RET2]])
+; CHECK_DISABLED-NEXT:    [[RET4:%.*]] = call i32 @inner4(i32 [[RET3]], i32 [[RET3]])
+; CHECK_DISABLED-NEXT:    ret i32 [[RET4]]
+;
+; CHECK_ENABLED-LABEL: define {{[^@]+}}@outer1()
+; CHECK_ENABLED-NEXT:  entry:
+; CHECK_ENABLED-NEXT:    [[RET1:%.*]] = call i32 @inner1(i32 1, i32 2)
+; CHECK_ENABLED-NEXT:    [[RET2:%.*]] = call i32 @inner2(i32 1, i32 2)
+; CHECK_ENABLED-NEXT:    [[RET3:%.*]] = call i32 @inner3.internalized(i32 [[RET1]], i32 [[RET2]])
+; CHECK_ENABLED-NEXT:    [[RET4:%.*]] = call i32 @inner4.internalized(i32 [[RET3]], i32 [[RET3]])
+; CHECK_ENABLED-NEXT:    ret i32 [[RET4]]
+;
+entry:
+  %ret1 = call i32 @inner1(i32 1, i32 2)
+  %ret2 = call i32 @inner2(i32 1, i32 2)
+  %ret3 = call i32 @inner3(i32 %ret1, i32 %ret2)
+  %ret4 = call i32 @inner4(i32 %ret3, i32 %ret3)
+  ret i32 %ret4
+}


        


More information about the llvm-commits mailing list