[llvm] adbaa39 - [Attributor] Change function internalization to not replace uses in internalized callers

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 28 15:57:40 PDT 2021


Author: Joseph Huber
Date: 2021-07-28T18:57:28-04:00
New Revision: adbaa39dfce7a8361d89b6a3b382fd8f50b94727

URL: https://github.com/llvm/llvm-project/commit/adbaa39dfce7a8361d89b6a3b382fd8f50b94727
DIFF: https://github.com/llvm/llvm-project/commit/adbaa39dfce7a8361d89b6a3b382fd8f50b94727.diff

LOG: [Attributor] Change function internalization to not replace uses in internalized callers

The current implementation of function internalization creats a copy of each
function and replaces every use. This has the downside that the external
versions of the functions will call into the internalized versions of the
functions. This prevents them from being fully independent of eachother. This
patch replaces the current internalization scheme with a method that creates
all the copies of the functions intended to be internalized first and then
replaces the uses as long as their caller is not already internalized.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D106931

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/IPO/Attributor.h
    llvm/lib/Transforms/IPO/Attributor.cpp
    llvm/lib/Transforms/IPO/OpenMPOpt.cpp
    llvm/test/Transforms/OpenMP/custom_state_machines.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index c93b8adcc8901..c3c12fd237463 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1855,6 +1855,10 @@ struct Attributor {
   ///
   static void createShallowWrapper(Function &F);
 
+  /// Returns true if the function \p F can be internalized. i.e. it has a
+  /// compatible linkage.
+  static bool isInternalizable(Function &F);
+
   /// Make another copy of the function \p F such that the copied version has
   /// internal linkage afterwards and can be analysed. Then we replace all uses
   /// of the original function to the copied one
@@ -1870,6 +1874,22 @@ struct Attributor {
   /// null pointer.
   static Function *internalizeFunction(Function &F, bool Force = false);
 
+  /// Make copies of each function in the set \p FnSet such that the copied
+  /// version has internal linkage afterwards and can be analysed. Then we
+  /// replace all uses of the original function to the copied one. The map
+  /// \p FnMap contains a mapping of functions to their internalized versions.
+  ///
+  /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr`
+  /// linkage can be internalized because these linkages guarantee that other
+  /// definitions with the same name have the same semantics as this one.
+  ///
+  /// This version will internalize all the functions in the set \p FnSet at
+  /// once and then replace the uses. This prevents internalized functions being
+  /// called by external functions when there is an internalized version in the
+  /// module.
+  static bool internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
+                                   DenseMap<Function *, Function *> &FnMap);
+
   /// Return the data layout associated with the anchor scope.
   const DataLayout &getDataLayout() const { return InfoCache.DL; }
 

diff  --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 762317425026b..5ccd001712eb8 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -1925,49 +1925,85 @@ void Attributor::createShallowWrapper(Function &F) {
   NumFnShallowWrappersCreated++;
 }
 
+bool Attributor::isInternalizable(Function &F) {
+  if (F.isDeclaration() || F.hasLocalLinkage() ||
+      GlobalValue::isInterposableLinkage(F.getLinkage()))
+    return false;
+  return true;
+}
+
 Function *Attributor::internalizeFunction(Function &F, bool Force) {
   if (!AllowDeepWrapper && !Force)
     return nullptr;
-  if (F.isDeclaration() || F.hasLocalLinkage() ||
-      GlobalValue::isInterposableLinkage(F.getLinkage()))
+  if (!isInternalizable(F))
     return nullptr;
 
-  Module &M = *F.getParent();
-  FunctionType *FnTy = F.getFunctionType();
-
-  // create a copy of the current function
-  Function *Copied = Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(),
-                                      F.getName() + ".internalized");
-  ValueToValueMapTy VMap;
-  auto *NewFArgIt = Copied->arg_begin();
-  for (auto &Arg : F.args()) {
-    auto ArgName = Arg.getName();
-    NewFArgIt->setName(ArgName);
-    VMap[&Arg] = &(*NewFArgIt++);
-  }
-  SmallVector<ReturnInst *, 8> Returns;
+  SmallPtrSet<Function *, 2> FnSet = {&F};
+  DenseMap<Function *, Function *> InternalizedFns;
+  internalizeFunctions(FnSet, InternalizedFns);
 
-  // Copy the body of the original function to the new one
-  CloneFunctionInto(Copied, &F, VMap, CloneFunctionChangeType::LocalChangesOnly,
-                    Returns);
-
-  // Set the linakage and visibility late as CloneFunctionInto has some implicit
-  // requirements.
-  Copied->setVisibility(GlobalValue::DefaultVisibility);
-  Copied->setLinkage(GlobalValue::PrivateLinkage);
+  return InternalizedFns[&F];
+}
 
-  // Copy metadata
-  SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
-  F.getAllMetadata(MDs);
-  for (auto MDIt : MDs)
-    if (!Copied->hasMetadata())
-      Copied->addMetadata(MDIt.first, *MDIt.second);
+bool Attributor::internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
+                                      DenseMap<Function *, Function *> &FnMap) {
+  for (Function *F : FnSet)
+    if (!Attributor::isInternalizable(*F))
+      return false;
 
-  M.getFunctionList().insert(F.getIterator(), Copied);
-  F.replaceAllUsesWith(Copied);
-  Copied->setDSOLocal(true);
+  FnMap.clear();
+  // Generate the internalized version of each function.
+  for (Function *F : FnSet) {
+    Module &M = *F->getParent();
+    FunctionType *FnTy = F->getFunctionType();
+
+    // Create a copy of the current function
+    Function *Copied =
+        Function::Create(FnTy, F->getLinkage(), F->getAddressSpace(),
+                         F->getName() + ".internalized");
+    ValueToValueMapTy VMap;
+    auto *NewFArgIt = Copied->arg_begin();
+    for (auto &Arg : F->args()) {
+      auto ArgName = Arg.getName();
+      NewFArgIt->setName(ArgName);
+      VMap[&Arg] = &(*NewFArgIt++);
+    }
+    SmallVector<ReturnInst *, 8> Returns;
+
+    // Copy the body of the original function to the new one
+    CloneFunctionInto(Copied, F, VMap,
+                      CloneFunctionChangeType::LocalChangesOnly, Returns);
+
+    // Set the linakage and visibility late as CloneFunctionInto has some
+    // implicit requirements.
+    Copied->setVisibility(GlobalValue::DefaultVisibility);
+    Copied->setLinkage(GlobalValue::PrivateLinkage);
+
+    // Copy metadata
+    SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+    F->getAllMetadata(MDs);
+    for (auto MDIt : MDs)
+      if (!Copied->hasMetadata())
+        Copied->addMetadata(MDIt.first, *MDIt.second);
+
+    M.getFunctionList().insert(F->getIterator(), Copied);
+    Copied->setDSOLocal(true);
+    FnMap[F] = Copied;
+  }
+
+  // Replace all uses of the old function with the new internalized function
+  // unless the caller is a function that was just internalized.
+  for (Function *F : FnSet) {
+    auto &InternalizedFn = FnMap[F];
+    auto IsNotInternalized = [&](Use &U) -> bool {
+      if (auto *CB = dyn_cast<CallBase>(U.getUser()))
+        return !FnMap.lookup(CB->getCaller());
+      return false;
+    };
+    F->replaceUsesWithIf(InternalizedFn, IsNotInternalized);
+  }
 
-  return Copied;
+  return true;
 }
 
 bool Attributor::isValidFunctionSignatureRewrite(

diff  --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index b803493527199..d6b97915ede65 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -4176,28 +4176,32 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
     ORE.emit([&]() {
       OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "OMP140", &F);
       return ORA << "Could not internalize function. "
-                 << "Some optimizations may not be possible.";
+                 << "Some optimizations may not be possible. [OMP140]";
     });
   };
 
   // Create internal copies of each function if this is a kernel Module. This
   // allows iterprocedural passes to see every call edge.
-  DenseSet<const Function *> InternalizedFuncs;
-  if (isOpenMPDevice(M))
+  DenseMap<Function *, Function *> InternalizedMap;
+  if (isOpenMPDevice(M)) {
+    SmallPtrSet<Function *, 16> InternalizeFns;
     for (Function &F : M)
       if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) &&
           !DisableInternalization) {
-        if (Attributor::internalizeFunction(F, /* Force */ true)) {
-          InternalizedFuncs.insert(&F);
+        if (Attributor::isInternalizable(F)) {
+          InternalizeFns.insert(&F);
         } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) {
           EmitRemark(F);
         }
       }
 
+    Attributor::internalizeFunctions(InternalizeFns, InternalizedMap);
+  }
+
   // Look at every function in the Module unless it was internalized.
   SmallVector<Function *, 16> SCC;
   for (Function &F : M)
-    if (!F.isDeclaration() && !InternalizedFuncs.contains(&F))
+    if (!F.isDeclaration() && !InternalizedMap.lookup(&F))
       SCC.push_back(&F);
 
   if (SCC.empty())

diff  --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
index 28987f6ee2b02..a9e78935bbac6 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
@@ -1664,15 +1664,15 @@ attributes #10 = { convergent nounwind readonly willreturn }
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A]], 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    br label [[RETURN:%.*]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[A]], 1
-; CHECK-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR7]]
-; CHECK-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR7]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
+; CHECK-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR8]]
+; CHECK-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR8]]
 ; CHECK-NEXT:    br label [[RETURN]]
 ; CHECK:       return:
 ; CHECK-NEXT:    ret void


        


More information about the llvm-commits mailing list