[llvm] [Linker] Propagate `nobuiltin` attributes when linking known libcalls #89431 (PR #92961)

Joseph Huber via llvm-commits llvm-commits at lists.llvm.org
Tue May 21 13:19:19 PDT 2024


https://github.com/jhuber6 created https://github.com/llvm/llvm-project/pull/92961

Summary:
As discussed in
https://discourse.llvm.org/t/rfc-libc-ffreestanding-fno-builtin.

LLVM ascribes special semantics to several functions that are known to
be `libcalls`. These are functions that middle-end optimizations may
transforms calls into or perform optimizations based off of known
semantics. However, these assumptions require an opaque function call to
be known valid. In situations like LTO or IR linking it is possible to
bring a libcall definition into the current module. Once this happens,
we can no longer make any guarantees about the semantics of these
functions.

We currently attempt to solve this by preventing all inlining if the
called function has `no-builtin` https://reviews.llvm.org/D74162.
However, this is overly pessimistic as it prevents all inlining even for
non-libcall functions.

This patch modifies the IRMover class to track known libcalls enabled
for the given target. If we encounter a known libcall during IR linking,
we then need to append the `nobuiltin` attribute to the destination
module. Afterwards, all new definitions we link in will be applied as
well.


>From 9bfde48dec16c1c3b585d275b44b43b9f3ae8260 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Thu, 9 May 2024 06:35:18 -0500
Subject: [PATCH] [Linker] Propagate `nobuiltin` attributes when linking known
 libcalls #89431

Summary:
As discussed in
https://discourse.llvm.org/t/rfc-libc-ffreestanding-fno-builtin.

LLVM ascribes special semantics to several functions that are known to
be `libcalls`. These are functions that middle-end optimizations may
transforms calls into or perform optimizations based off of known
semantics. However, these assumptions require an opaque function call to
be known valid. In situations like LTO or IR linking it is possible to
bring a libcall definition into the current module. Once this happens,
we can no longer make any guarantees about the semantics of these
functions.

We currently attempt to solve this by preventing all inlining if the
called function has `no-builtin` https://reviews.llvm.org/D74162.
However, this is overly pessimistic as it prevents all inlining even for
non-libcall functions.

This patch modifies the IRMover class to track known libcalls enabled
for the given target. If we encounter a known libcall during IR linking,
we then need to append the `nobuiltin` attribute to the destination
module. Afterwards, all new definitions we link in will be applied as
well.
---
 llvm/include/llvm/Linker/IRMover.h      |  26 +++++-
 llvm/lib/Linker/CMakeLists.txt          |   1 +
 llvm/lib/Linker/IRMover.cpp             | 119 +++++++++++++++++++-----
 llvm/test/Linker/Inputs/has-libcalls.ll |   5 +
 llvm/test/Linker/Inputs/strlen.ll       |  21 +++++
 llvm/test/Linker/libcalls-module.ll     |  16 ++++
 llvm/test/Linker/libcalls.ll            |  39 ++++++++
 7 files changed, 203 insertions(+), 24 deletions(-)
 create mode 100644 llvm/test/Linker/Inputs/has-libcalls.ll
 create mode 100644 llvm/test/Linker/Inputs/strlen.ll
 create mode 100644 llvm/test/Linker/libcalls-module.ll
 create mode 100644 llvm/test/Linker/libcalls.ll

diff --git a/llvm/include/llvm/Linker/IRMover.h b/llvm/include/llvm/Linker/IRMover.h
index 1e3c5394ffa2a..2f5d80d27cac3 100644
--- a/llvm/include/llvm/Linker/IRMover.h
+++ b/llvm/include/llvm/Linker/IRMover.h
@@ -12,11 +12,14 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/StringSaver.h"
+#include "llvm/TargetParser/Triple.h"
 #include <functional>
 
 namespace llvm {
 class Error;
-class GlobalValue;
 class Metadata;
 class Module;
 class StructType;
@@ -60,6 +63,26 @@ class IRMover {
     bool hasType(StructType *Ty);
   };
 
+  /// Utility for handling linking of known libcall functions. If a merged
+  /// module contains a recognized library call we can no longer perform any
+  /// libcall related transformations.
+  class LibcallHandler {
+    StringSet<> Libcalls;
+    StringSet<> Triples;
+
+    BumpPtrAllocator Alloc;
+    StringSaver Saver;
+
+  public:
+    LibcallHandler() : Saver(Alloc) {}
+
+    void updateLibcalls(const Triple &TheTriple);
+
+    bool checkLibcalls(GlobalValue &GV);
+
+    bool HasLibcalls = false;
+  };
+
   IRMover(Module &M);
 
   typedef std::function<void(GlobalValue &)> ValueAdder;
@@ -84,6 +107,7 @@ class IRMover {
   Module &Composite;
   IdentifiedStructTypeSet IdentifiedStructTypes;
   MDMapT SharedMDs; ///< A Metadata map to use for all calls to \a move().
+  LibcallHandler Libcalls;
 };
 
 } // End llvm namespace
diff --git a/llvm/lib/Linker/CMakeLists.txt b/llvm/lib/Linker/CMakeLists.txt
index 5afb40f8b5884..25001c09a62d2 100644
--- a/llvm/lib/Linker/CMakeLists.txt
+++ b/llvm/lib/Linker/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_component_library(LLVMLinker
   intrinsics_gen
 
   LINK_COMPONENTS
+  Analysis
   Core
   Object
   Support
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index 7a5aa0c804782..64ed0125408cf 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -12,6 +12,7 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
@@ -87,7 +88,7 @@ class TypeMapTy : public ValueMapTypeRemapper {
 
   bool areTypesIsomorphic(Type *DstTy, Type *SrcTy);
 };
-}
+} // namespace
 
 void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) {
   assert(SpeculativeTypes.empty());
@@ -399,6 +400,9 @@ class IRLinker {
   /// A metadata map that's shared between IRLinker instances.
   MDMapT &SharedMDs;
 
+  /// A list of libcalls that the current target may call.
+  IRMover::LibcallHandler &Libcalls;
+
   /// Mapping of values from what they used to be in Src, to what they are now
   /// in DstM.  ValueToValueMapTy is a ValueMap, which involves some overhead
   /// due to the use of Value handles which the Linker doesn't actually need,
@@ -408,7 +412,7 @@ class IRLinker {
 
   DenseSet<GlobalValue *> ValuesToLink;
   std::vector<GlobalValue *> Worklist;
-  std::vector<std::pair<GlobalValue *, Value*>> RAUWWorklist;
+  std::vector<std::pair<GlobalValue *, Value *>> RAUWWorklist;
 
   /// Set of globals with eagerly copied metadata that may require remapping.
   /// This remapping is performed after metadata linking.
@@ -540,10 +544,12 @@ class IRLinker {
   IRLinker(Module &DstM, MDMapT &SharedMDs,
            IRMover::IdentifiedStructTypeSet &Set, std::unique_ptr<Module> SrcM,
            ArrayRef<GlobalValue *> ValuesToLink,
-           IRMover::LazyCallback AddLazyFor, bool IsPerformingImport)
+           IRMover::LibcallHandler &Libcalls, IRMover::LazyCallback AddLazyFor,
+           bool IsPerformingImport)
       : DstM(DstM), SrcM(std::move(SrcM)), AddLazyFor(std::move(AddLazyFor)),
         TypeMap(Set), GValMaterializer(*this), LValMaterializer(*this),
-        SharedMDs(SharedMDs), IsPerformingImport(IsPerformingImport),
+        SharedMDs(SharedMDs), Libcalls(Libcalls),
+        IsPerformingImport(IsPerformingImport),
         Mapper(ValueMap, RF_ReuseAndMutateDistinctMDs | RF_IgnoreMissingLocals,
                &TypeMap, &GValMaterializer),
         IndirectSymbolMCID(Mapper.registerAlternateMappingContext(
@@ -559,6 +565,13 @@ class IRLinker {
   Error run();
   Value *materialize(Value *V, bool ForIndirectSymbol);
 };
+} // namespace
+
+static void addNoBuiltinAttributes(Function &F) {
+  F.setAttributes(
+      F.getAttributes().addFnAttribute(F.getContext(), "no-builtins"));
+  F.setAttributes(
+      F.getAttributes().addFnAttribute(F.getContext(), Attribute::NoBuiltin));
 }
 
 /// The LLVM SymbolTable class autorenames globals that conflict in the symbol
@@ -632,8 +645,8 @@ Value *IRLinker::materialize(Value *V, bool ForIndirectSymbol) {
   }
 
   // If the global is being linked for an indirect symbol, it may have already
-  // been scheduled to satisfy a regular symbol. Similarly, a global being linked
-  // for a regular symbol may have already been scheduled for an indirect
+  // been scheduled to satisfy a regular symbol. Similarly, a global being
+  // linked for a regular symbol may have already been scheduled for an indirect
   // symbol. Check for these cases by looking in the other value map and
   // confirming the same value has been scheduled.  If there is an entry in the
   // ValueMap but the value is different, it means that the value already had a
@@ -755,7 +768,8 @@ GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV,
     NewGV->setLinkage(GlobalValue::ExternalWeakLinkage);
 
   if (auto *NewGO = dyn_cast<GlobalObject>(NewGV)) {
-    // Metadata for global variables and function declarations is copied eagerly.
+    // Metadata for global variables and function declarations is copied
+    // eagerly.
     if (isa<GlobalVariable>(SGV) || SGV->isDeclaration()) {
       NewGO->copyMetadata(cast<GlobalObject>(SGV), 0);
       if (SGV->isDeclaration() && NewGO->hasMetadata())
@@ -922,8 +936,8 @@ IRLinker::linkAppendingVarProto(GlobalVariable *DstGV,
   if (SrcGV->isDeclaration())
     return DstGV;
 
-  Type *EltTy = cast<ArrayType>(TypeMap.get(SrcGV->getValueType()))
-                    ->getElementType();
+  Type *EltTy =
+      cast<ArrayType>(TypeMap.get(SrcGV->getValueType()))->getElementType();
 
   // FIXME: This upgrade is done during linking to support the C API.  Once the
   // old form is deprecated, we should move this upgrade to
@@ -1097,7 +1111,7 @@ Expected<Constant *> IRLinker::linkGlobalValueProto(GlobalValue *SGV,
   // assumes it is being invoked on a type in the source module.
   if (DGV && NewGV != SGV) {
     C = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-      NewGV, TypeMap.get(SGV->getType()));
+        NewGV, TypeMap.get(SGV->getType()));
   }
 
   if (DGV && NewGV != DGV) {
@@ -1471,7 +1485,6 @@ Error IRLinker::linkModuleFlagsMetadata() {
       break;
     }
     }
-
   }
 
   // For the Min behavior, set the value to 0 if either module does not have the
@@ -1605,14 +1618,29 @@ Error IRLinker::run() {
 
   DstM.setTargetTriple(SrcTriple.merge(DstTriple));
 
+  // Update the target triple's libcall information if it was changed.
+  Libcalls.updateLibcalls(Triple(DstM.getTargetTriple()));
+
   // Loop over all of the linked values to compute type mappings.
   computeTypeMapping();
 
+  bool AddsLibcalls = SrcM->getModuleFlag("llvm-libcalls") &&
+                      !DstM.getModuleFlag("llvm-libcalls");
+  if (AddsLibcalls)
+    Libcalls.HasLibcalls = true;
   std::reverse(Worklist.begin(), Worklist.end());
   while (!Worklist.empty()) {
     GlobalValue *GV = Worklist.back();
     Worklist.pop_back();
 
+    // If the module already contains libcall functions we need every function
+    // linked in to have `nobuiltin` attributes. Otherwise check if this is a
+    // libcall definition.
+    if (Function *F = dyn_cast<Function>(GV); F && Libcalls.HasLibcalls)
+      addNoBuiltinAttributes(*F);
+    else
+      AddsLibcalls = Libcalls.checkLibcalls(*GV);
+
     // Already mapped.
     if (ValueMap.find(GV) != ValueMap.end() ||
         IndirectSymbolValueMap.find(GV) != IndirectSymbolValueMap.end())
@@ -1644,20 +1672,20 @@ Error IRLinker::run() {
 
   if (!IsPerformingImport && !SrcM->getModuleInlineAsm().empty()) {
     // Append the module inline asm string.
-    DstM.appendModuleInlineAsm(adjustInlineAsm(SrcM->getModuleInlineAsm(),
-                                               SrcTriple));
+    DstM.appendModuleInlineAsm(
+        adjustInlineAsm(SrcM->getModuleInlineAsm(), SrcTriple));
   } else if (IsPerformingImport) {
     // Import any symver directives for symbols in DstM.
     ModuleSymbolTable::CollectAsmSymvers(*SrcM,
                                          [&](StringRef Name, StringRef Alias) {
-      if (DstM.getNamedValue(Name)) {
-        SmallString<256> S(".symver ");
-        S += Name;
-        S += ", ";
-        S += Alias;
-        DstM.appendModuleInlineAsm(S);
-      }
-    });
+                                           if (DstM.getNamedValue(Name)) {
+                                             SmallString<256> S(".symver ");
+                                             S += Name;
+                                             S += ", ";
+                                             S += Alias;
+                                             DstM.appendModuleInlineAsm(S);
+                                           }
+                                         });
   }
 
   // Reorder the globals just added to the destination module to match their
@@ -1675,6 +1703,17 @@ Error IRLinker::run() {
     }
   }
 
+  // If we have imported a recognized libcall function we can no longer make any
+  // reasonable optimizations based off of its semantics. Add the 'nobuiltin'
+  // attribute to every function to suppress libcall detection.
+  if (AddsLibcalls) {
+    Metadata *MD = DstM.getModuleFlag("llvm-libcalls");
+    if (!MD)
+      DstM.addModuleFlag(llvm::Module::Override, "llvm-libcalls", 1);
+    for (Function &F : DstM.functions())
+      addNoBuiltinAttributes(F);
+  }
+
   // Merge the module flags into the DstM module.
   return linkModuleFlagsMetadata();
 }
@@ -1757,6 +1796,29 @@ bool IRMover::IdentifiedStructTypeSet::hasType(StructType *Ty) {
   return I == NonOpaqueStructTypes.end() ? false : *I == Ty;
 }
 
+void IRMover::LibcallHandler::updateLibcalls(const Triple &TheTriple) {
+  if (Triples.count(TheTriple.getTriple()))
+    return;
+  Triples.insert(Saver.save(TheTriple.getTriple()));
+
+  // Collect the names of runtime functions that the target may want to call.
+  TargetLibraryInfoImpl TLII(TheTriple);
+  TargetLibraryInfo TLI(TLII);
+  for (unsigned I = 0, E = static_cast<unsigned>(LibFunc::NumLibFuncs); I != E;
+       ++I) {
+    LibFunc F = static_cast<LibFunc>(I);
+    if (TLI.has(F))
+      Libcalls.insert(TLI.getName(F));
+  }
+}
+
+bool IRMover::LibcallHandler::checkLibcalls(GlobalValue &GV) {
+  if (HasLibcalls)
+    return false;
+  return HasLibcalls = (isa<Function>(&GV) && !GV.isDeclaration() &&
+                        Libcalls.count(GV.getName()));
+}
+
 IRMover::IRMover(Module &M) : Composite(M) {
   TypeFinder StructTypes;
   StructTypes.run(M, /* OnlyNamed */ false);
@@ -1772,14 +1834,25 @@ IRMover::IRMover(Module &M) : Composite(M) {
   for (const auto *MD : StructTypes.getVisitedMetadata()) {
     SharedMDs[MD].reset(const_cast<MDNode *>(MD));
   }
+
+  // Check the composite module for any already present libcalls. If we define
+  // these then it is important to mark any imported functions as 'nobuiltin'.
+  Libcalls.updateLibcalls(Triple(Composite.getTargetTriple()));
+  for (Function &F : Composite.functions())
+    if (Libcalls.checkLibcalls(F))
+      break;
+
+  if (Libcalls.HasLibcalls)
+    for (Function &F : Composite.functions())
+      addNoBuiltinAttributes(F);
 }
 
 Error IRMover::move(std::unique_ptr<Module> Src,
                     ArrayRef<GlobalValue *> ValuesToLink,
                     LazyCallback AddLazyFor, bool IsPerformingImport) {
   IRLinker TheIRLinker(Composite, SharedMDs, IdentifiedStructTypes,
-                       std::move(Src), ValuesToLink, std::move(AddLazyFor),
-                       IsPerformingImport);
+                       std::move(Src), ValuesToLink, Libcalls,
+                       std::move(AddLazyFor), IsPerformingImport);
   Error E = TheIRLinker.run();
   Composite.dropTriviallyDeadConstantArrays();
   return E;
diff --git a/llvm/test/Linker/Inputs/has-libcalls.ll b/llvm/test/Linker/Inputs/has-libcalls.ll
new file mode 100644
index 0000000000000..6a0990cd4b4d0
--- /dev/null
+++ b/llvm/test/Linker/Inputs/has-libcalls.ll
@@ -0,0 +1,5 @@
+target triple = "x86_64-unknown-linux-gnu"
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 4, !"llvm-libcalls", i32 1}
diff --git a/llvm/test/Linker/Inputs/strlen.ll b/llvm/test/Linker/Inputs/strlen.ll
new file mode 100644
index 0000000000000..bc54aaf41e0ce
--- /dev/null
+++ b/llvm/test/Linker/Inputs/strlen.ll
@@ -0,0 +1,21 @@
+target triple = "x86_64-unknown-linux-gnu"
+
+define i64 @strlen(ptr %s) #0 {
+entry:
+  br label %for.cond
+
+for.cond:
+  %s.addr.0 = phi ptr [ %s, %entry ], [ %incdec.ptr, %for.cond ]
+  %0 = load i8, ptr %s.addr.0, align 1
+  %tobool.not = icmp eq i8 %0, 0
+  %incdec.ptr = getelementptr inbounds i8, ptr %s.addr.0, i64 1
+  br i1 %tobool.not, label %for.end, label %for.cond
+
+for.end:
+  %sub.ptr.lhs.cast = ptrtoint ptr %s.addr.0 to i64
+  %sub.ptr.rhs.cast = ptrtoint ptr %s to i64
+  %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+  ret i64 %sub.ptr.sub
+}
+
+attributes #0 = { noinline }
diff --git a/llvm/test/Linker/libcalls-module.ll b/llvm/test/Linker/libcalls-module.ll
new file mode 100644
index 0000000000000..913f225768748
--- /dev/null
+++ b/llvm/test/Linker/libcalls-module.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-link %s %S/Inputs/has-libcalls.ll -S -o - 2>%t.a.err | FileCheck %s
+; RUN: llvm-link %S/Inputs/has-libcalls.ll %s -S -o - 2>%t.a.err | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: define void @foo() #[[ATTR0:[0-9]+]]
+define void @foo() #0 {
+  ret void
+}
+
+attributes #0 = { noinline }
+
+; CHECK: attributes #[[ATTR0]] = { nobuiltin noinline "no-builtins" }
+
+; CHECK: !llvm.module.flags = !{!0}
+; CHECK: !0 = !{i32 4, !"llvm-libcalls", i32 1}
diff --git a/llvm/test/Linker/libcalls.ll b/llvm/test/Linker/libcalls.ll
new file mode 100644
index 0000000000000..ddc0d35e91d9e
--- /dev/null
+++ b/llvm/test/Linker/libcalls.ll
@@ -0,0 +1,39 @@
+; RUN: llvm-link %s %S/Inputs/strlen.ll -S -o - 2>%t.a.err | FileCheck %s --check-prefix=CHECK1
+; RUN: llvm-link %S/Inputs/strlen.ll %s -S -o - 2>%t.a.err | FileCheck %s --check-prefix=CHECK2
+
+target triple = "x86_64-unknown-linux-gnu"
+
+ at .str = private unnamed_addr constant [7 x i8] c"string\00", align 1
+ at str = dso_local global ptr @.str, align 8
+
+define void @foo() #0 {
+  ret void
+}
+
+declare i64 @strlen(ptr)
+
+define void @bar() #0 {
+  ret void
+}
+
+define i64 @baz() #0 {
+entry:
+  %0 = load ptr, ptr @str, align 8
+  %call = call i64 @strlen(ptr noundef %0)
+  ret i64 %call
+}
+
+attributes #0 = { noinline }
+
+; CHECK1: define void @foo() #[[ATTR0:[0-9]+]]
+; CHECK1: define void @bar() #[[ATTR0:[0-9]+]]
+; CHECK1: define i64 @baz() #[[ATTR0:[0-9]+]]
+; CHECK1: define i64 @strlen(ptr [[S:%.*]]) #[[ATTR0]]
+
+; CHECK2: define i64 @strlen(ptr [[S:%.*]]) #[[ATTR0:[0-9]+]]
+; CHECK2: define void @foo() #[[ATTR0:[0-9]+]]
+; CHECK2: define void @bar() #[[ATTR0:[0-9]+]]
+; CHECK2: define i64 @baz() #[[ATTR0]]
+
+; CHECK1: attributes #[[ATTR0]] = { nobuiltin noinline "no-builtins" }
+; CHECK2: attributes #[[ATTR0]] = { nobuiltin noinline "no-builtins" }



More information about the llvm-commits mailing list