[llvm] [Linker] Propagate `nobuiltin` attributes when linking known libcalls (PR #89431)

Joseph Huber via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 19 11:17:30 PDT 2024


https://github.com/jhuber6 created https://github.com/llvm/llvm-project/pull/89431

Summary:
As discussed in https://discourse.llvm.org/t/rfc-libc-ffreestanding-fno-builtin.

LLVM ascribes special semantics to several functions that are known to
be `libcalls`. These are functions that middle-end optimizations may
transforms calls into or perform optimizations based off of known
semantics. However, these assumptions require an opaque function call to
be known valid. In situations like LTO or IR linking it is possible to
bring a libcall definition into the current module. Once this happens,
we can no long make any gurantees about the semantics of these
functions.

We currently attempt to solve this by preventing all inlining if the
called function has `no-builtin` https://reviews.llvm.org/D74162.
However, this is overly pessimistic as it prevents all inlining even for
non-libcall functions.

This patch modifies the IRMover class to track known libcalls enabled
for the given target. If we encounter a known libcall during IR linking,
we then need to append the `nobuiltin` attribute to the destination
module. Afterwards, all new definitions we link in will be applied as
well.


>From eaef5f5e2eeef549f4c4450be198878f6023177e Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 19 Apr 2024 13:11:50 -0500
Subject: [PATCH] [Linker] Propagate `nobuiltin` attributes when linking known
 libcalls

Summary:
As discussed in https://discourse.llvm.org/t/rfc-libc-ffreestanding-fno-builtin.

LLVM ascribes special semantics to several functions that are known to
be `libcalls`. These are functions that middle-end optimizations may
transforms calls into or perform optimizations based off of known
semantics. However, these assumptions require an opaque function call to
be known valid. In situations like LTO or IR linking it is possible to
bring a libcall definition into the current module. Once this happens,
we can no long make any gurantees about the semantics of these
functions.

We currently attempt to solve this by preventing all inlining if the
called function has `no-builtin` https://reviews.llvm.org/D74162.
However, this is overly pessimistic as it prevents all inlining even for
non-libcall functions.

This patch modifies the IRMover class to track known libcalls enabled
for the given target. If we encounter a known libcall during IR linking,
we then need to append the `nobuiltin` attribute to the destination
module. Afterwards, all new definitions we link in will be applied as
well.
---
 llvm/include/llvm/Linker/IRMover.h | 34 ++++++++++++++++++++-
 llvm/lib/Linker/CMakeLists.txt     |  1 +
 llvm/lib/Linker/IRMover.cpp        | 49 ++++++++++++++++++++++++++++--
 llvm/test/Linker/Inputs/strlen.ll  | 19 ++++++++++++
 llvm/test/Linker/libcalls.ll       | 25 +++++++++++++++
 5 files changed, 124 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Linker/Inputs/strlen.ll
 create mode 100644 llvm/test/Linker/libcalls.ll

diff --git a/llvm/include/llvm/Linker/IRMover.h b/llvm/include/llvm/Linker/IRMover.h
index 1e3c5394ffa2af..57c6fd2488f14a 100644
--- a/llvm/include/llvm/Linker/IRMover.h
+++ b/llvm/include/llvm/Linker/IRMover.h
@@ -12,11 +12,14 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/StringSaver.h"
+#include "llvm/TargetParser/Triple.h"
 #include <functional>
 
 namespace llvm {
 class Error;
-class GlobalValue;
 class Metadata;
 class Module;
 class StructType;
@@ -60,6 +63,34 @@ class IRMover {
     bool hasType(StructType *Ty);
   };
 
+  /// Utility for handling linking of known libcall functions. If a merged
+  /// module contains a recognized library call we can no longer perform any
+  /// libcall related transformations.
+  class LibcallHandler {
+    bool HasLibcalls = false;
+
+    StringSet<> Libcalls;
+    StringSet<> Triples;
+
+    BumpPtrAllocator Alloc;
+    StringSaver Saver;
+
+  public:
+    LibcallHandler() : Saver(Alloc) {}
+
+    void updateLibcalls(llvm::Triple Triple);
+
+    bool checkLibcalls(llvm::GlobalValue *GV) {
+      if (HasLibcalls)
+        return false;
+      HasLibcalls = GV && isa<Function>(GV) && !GV->isDeclaration() &&
+                    Libcalls.count(GV->getName());
+      return HasLibcalls;
+    }
+
+    bool hasLibcalls() const { return HasLibcalls; }
+  };
+
   IRMover(Module &M);
 
   typedef std::function<void(GlobalValue &)> ValueAdder;
@@ -84,6 +115,7 @@ class IRMover {
   Module &Composite;
   IdentifiedStructTypeSet IdentifiedStructTypes;
   MDMapT SharedMDs; ///< A Metadata map to use for all calls to \a move().
+  LibcallHandler Libcalls;
 };
 
 } // End llvm namespace
diff --git a/llvm/lib/Linker/CMakeLists.txt b/llvm/lib/Linker/CMakeLists.txt
index 5afb40f8b58842..25001c09a62d25 100644
--- a/llvm/lib/Linker/CMakeLists.txt
+++ b/llvm/lib/Linker/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_component_library(LLVMLinker
   intrinsics_gen
 
   LINK_COMPONENTS
+  Analysis
   Core
   Object
   Support
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index 7a5aa0c8047821..d60d6d1feb153b 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -12,6 +12,7 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
@@ -399,6 +400,9 @@ class IRLinker {
   /// A metadata map that's shared between IRLinker instances.
   MDMapT &SharedMDs;
 
+  /// A list of libcalls that the current target may call.
+  IRMover::LibcallHandler &Libcalls;
+
   /// Mapping of values from what they used to be in Src, to what they are now
   /// in DstM.  ValueToValueMapTy is a ValueMap, which involves some overhead
   /// due to the use of Value handles which the Linker doesn't actually need,
@@ -540,10 +544,12 @@ class IRLinker {
   IRLinker(Module &DstM, MDMapT &SharedMDs,
            IRMover::IdentifiedStructTypeSet &Set, std::unique_ptr<Module> SrcM,
            ArrayRef<GlobalValue *> ValuesToLink,
+           llvm::IRMover::LibcallHandler &Libcalls,
            IRMover::LazyCallback AddLazyFor, bool IsPerformingImport)
       : DstM(DstM), SrcM(std::move(SrcM)), AddLazyFor(std::move(AddLazyFor)),
         TypeMap(Set), GValMaterializer(*this), LValMaterializer(*this),
-        SharedMDs(SharedMDs), IsPerformingImport(IsPerformingImport),
+        SharedMDs(SharedMDs), Libcalls(Libcalls),
+        IsPerformingImport(IsPerformingImport),
         Mapper(ValueMap, RF_ReuseAndMutateDistinctMDs | RF_IgnoreMissingLocals,
                &TypeMap, &GValMaterializer),
         IndirectSymbolMCID(Mapper.registerAlternateMappingContext(
@@ -1605,14 +1611,27 @@ Error IRLinker::run() {
 
   DstM.setTargetTriple(SrcTriple.merge(DstTriple));
 
+  // Update the target triple's libcall information if it was changed.
+  Libcalls.updateLibcalls(Triple(DstM.getTargetTriple()));
+
   // Loop over all of the linked values to compute type mappings.
   computeTypeMapping();
 
+  bool AddsLibcalls;
   std::reverse(Worklist.begin(), Worklist.end());
   while (!Worklist.empty()) {
     GlobalValue *GV = Worklist.back();
     Worklist.pop_back();
 
+    // If the module already contains libcall functions we need every function
+    // linked in to have `nobuiltin` attributes. Otherwise check if this is a
+    // libcall definition.
+    if (Function *F = dyn_cast<Function>(GV); F && Libcalls.hasLibcalls()) {
+      F->setAttributes(F->getAttributes().addFnAttribute(
+          F->getParent()->getContext(), Attribute::NoBuiltin));
+    } else
+      AddsLibcalls = Libcalls.checkLibcalls(GV);
+
     // Already mapped.
     if (ValueMap.find(GV) != ValueMap.end() ||
         IndirectSymbolValueMap.find(GV) != IndirectSymbolValueMap.end())
@@ -1675,6 +1694,14 @@ Error IRLinker::run() {
     }
   }
 
+  // If we have imported a recognized libcall function we can no longer make any
+  // reasonable optimizations based off of its semantics. Add the 'nobuiltin'
+  // attribute to every function to suppress libcall detection.
+  if (AddsLibcalls)
+    for (Function &F : DstM.functions())
+      F.setAttributes(F.getAttributes().addFnAttribute(DstM.getContext(),
+                                                       Attribute::NoBuiltin));
+
   // Merge the module flags into the DstM module.
   return linkModuleFlagsMetadata();
 }
@@ -1757,6 +1784,22 @@ bool IRMover::IdentifiedStructTypeSet::hasType(StructType *Ty) {
   return I == NonOpaqueStructTypes.end() ? false : *I == Ty;
 }
 
+void IRMover::LibcallHandler::updateLibcalls(llvm::Triple Triple) {
+  if (Triples.count(Triple.getTriple()))
+    return;
+  Triples.insert(Saver.save(Triple.getTriple()));
+
+  // Collect the names of runtime functions that the target may want to call.
+  TargetLibraryInfoImpl TLII(Triple);
+  TargetLibraryInfo TLI(TLII);
+  for (unsigned I = 0, E = static_cast<unsigned>(LibFunc::NumLibFuncs); I != E;
+       ++I) {
+    LibFunc F = static_cast<LibFunc>(I);
+    if (TLI.has(F))
+      Libcalls.insert(TLI.getName(F));
+  }
+}
+
 IRMover::IRMover(Module &M) : Composite(M) {
   TypeFinder StructTypes;
   StructTypes.run(M, /* OnlyNamed */ false);
@@ -1778,8 +1821,8 @@ Error IRMover::move(std::unique_ptr<Module> Src,
                     ArrayRef<GlobalValue *> ValuesToLink,
                     LazyCallback AddLazyFor, bool IsPerformingImport) {
   IRLinker TheIRLinker(Composite, SharedMDs, IdentifiedStructTypes,
-                       std::move(Src), ValuesToLink, std::move(AddLazyFor),
-                       IsPerformingImport);
+                       std::move(Src), ValuesToLink, Libcalls,
+                       std::move(AddLazyFor), IsPerformingImport);
   Error E = TheIRLinker.run();
   Composite.dropTriviallyDeadConstantArrays();
   return E;
diff --git a/llvm/test/Linker/Inputs/strlen.ll b/llvm/test/Linker/Inputs/strlen.ll
new file mode 100644
index 00000000000000..9d8b2cd9f0e2a5
--- /dev/null
+++ b/llvm/test/Linker/Inputs/strlen.ll
@@ -0,0 +1,19 @@
+target triple = "x86_64-unknown-linux-gnu"
+
+define i64 @strlen(ptr %s) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %s.addr.0 = phi ptr [ %s, %entry ], [ %incdec.ptr, %for.cond ]
+  %0 = load i8, ptr %s.addr.0, align 1
+  %tobool.not = icmp eq i8 %0, 0
+  %incdec.ptr = getelementptr inbounds i8, ptr %s.addr.0, i64 1
+  br i1 %tobool.not, label %for.end, label %for.cond
+
+for.end:
+  %sub.ptr.lhs.cast = ptrtoint ptr %s.addr.0 to i64
+  %sub.ptr.rhs.cast = ptrtoint ptr %s to i64
+  %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+  ret i64 %sub.ptr.sub
+}
diff --git a/llvm/test/Linker/libcalls.ll b/llvm/test/Linker/libcalls.ll
new file mode 100644
index 00000000000000..a6c560585cbe4f
--- /dev/null
+++ b/llvm/test/Linker/libcalls.ll
@@ -0,0 +1,25 @@
+; RUN: llvm-link %s %S/Inputs/strlen.ll -S -o - 2>%t.a.err | FileCheck %s --check-prefix=CHECK1
+; RUN: llvm-link %S/Inputs/strlen.ll %s -S -o - 2>%t.a.err | FileCheck %s --check-prefix=CHECK2
+
+target triple = "x86_64-unknown-linux-gnu"
+
+ at .str = private unnamed_addr constant [7 x i8] c"string\00", align 1
+ at str = dso_local global ptr @.str, align 8
+
+declare i64 @strlen(ptr)
+
+define i64 @foo() {
+entry:
+  %0 = load ptr, ptr @str, align 8
+  %call = call i64 @strlen(ptr noundef %0)
+  ret i64 %call
+}
+
+; CHECK1: define i64 @foo() #[[ATTR0:[0-9]+]]
+; CHECK1: define i64 @strlen(ptr [[S:%.*]]) #[[ATTR0]]
+
+; CHECK2: define i64 @strlen(ptr [[S:%.*]]) #[[ATTR0:[0-9]+]]
+; CHECK2: define i64 @foo() #[[ATTR0]]
+
+; CHECK1: attributes #[[ATTR0]] = { nobuiltin }
+; CHECK2: attributes #[[ATTR0]] = { nobuiltin }



More information about the llvm-commits mailing list