[llvm] [LTO] Drop the weak function if there is a non-weak global symbol (PR #76287)

via llvm-commits llvm-commits at lists.llvm.org
Sat Dec 23 19:15:30 PST 2023


https://github.com/DianQK updated https://github.com/llvm/llvm-project/pull/76287

>From 95a3dcec655a719071279b73a96611e6af2b8cfe Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sat, 23 Dec 2023 21:15:12 +0800
Subject: [PATCH 1/3] Pre-commit test cases

---
 llvm/test/LTO/X86/inline-asm-lto-weak.ll | 80 ++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 llvm/test/LTO/X86/inline-asm-lto-weak.ll

diff --git a/llvm/test/LTO/X86/inline-asm-lto-weak.ll b/llvm/test/LTO/X86/inline-asm-lto-weak.ll
new file mode 100644
index 00000000000000..8b29929c3e5b91
--- /dev/null
+++ b/llvm/test/LTO/X86/inline-asm-lto-weak.ll
@@ -0,0 +1,80 @@
+; RUN: split-file %s %t
+; RUN: opt %t/asm_global.ll -o %t/asm_global.bc
+; RUN: opt %t/asm_weak.ll -o %t/asm_weak.bc
+; RUN: opt %t/fn_global.ll -o %t/fn_global.bc
+; RUN: opt %t/fn_weak.ll -o %t/fn_weak.bc
+
+; RUN: not llvm-lto %t/asm_global.bc %t/fn_global.bc -o %to1.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: not llvm-lto %t/fn_global.bc %t/asm_global.bc -o %to1.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR
+
+; FIXME: We want to drop the weak function.
+; RUN: not llvm-lto %t/asm_global.bc %t/fn_weak.bc -o %to2.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: not llvm-lto %t/fn_weak.bc %t/asm_global.bc -o %to2.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR
+
+; FIXME: We want to drop the weak asm symbol.
+; RUN: not llvm-lto %t/asm_global.bc %t/asm_weak.bc -o %to3.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: not llvm-lto %t/asm_weak.bc %t/asm_global.bc -o %to3.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR
+
+; FIXME: We want to drop the weak asm symbol.
+; RUN: not llvm-lto %t/asm_weak.bc %t/fn_global.bc -o %to4.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR2
+; RUN: not llvm-lto %t/fn_global.bc %t/asm_weak.bc -o %to4.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR2
+
+; RUN: not llvm-lto %t/asm_weak.bc %t/fn_weak.bc -o %to5.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: not llvm-lto %t/fn_weak.bc %t/asm_weak.bc -o %to5.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR
+
+; Drop the weak function.
+; RUN: llvm-lto %t/fn_global.bc %t/fn_weak.bc -o %to6.o --save-linked-module --exported-symbol=foo
+; RUN: llvm-dis < %to6.o.linked.bc | FileCheck %s --check-prefix=FN_GLOBAL
+; RUN: llvm-lto %t/fn_weak.bc %t/fn_global.bc -o %to6.o --save-linked-module --exported-symbol=foo
+; RUN: llvm-dis < %to6.o.linked.bc | FileCheck %s --check-prefix=FN_GLOBAL
+
+; ERR: error: <unknown>:0: symbol 'foo' is already defined
+; ERR2: error: <unknown>:0: foo changed binding to STB_GLOBAL
+
+; FN_GLOBAL: define i32 @foo(i32 %i)
+
+;--- asm_global.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm ".global foo"
+module asm "foo:"
+module asm "leal	1(%rdi), %eax"
+module asm "retq"
+
+;--- asm_weak.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm ".weak foo"
+module asm "foo:"
+module asm "leal	2(%rdi), %eax"
+module asm "retq"
+
+;--- fn_global.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo(i32 %i) {
+  %r = add i32 %i, 3
+  ret i32 %r
+}
+
+define internal i32 @bar(i32 %i) {
+  %r = call i32 @foo(i32 %i)
+  ret i32 %r
+}
+
+;--- fn_weak.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define weak i32 @foo(i32 %i) {
+  %r = add i32 %i, 4
+  ret i32 %r
+}
+
+define internal i32 @bar(i32 %i) {
+  %r = call i32 @foo(i32 %i)
+  ret i32 %r
+}

>From 8b22063cbac985a99cfd85422c981182d73f6b7c Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sat, 23 Dec 2023 21:40:48 +0800
Subject: [PATCH 2/3] [LTO] Drop the weak function if there is a non-weak
 global symbol

---
 llvm/include/llvm/Object/ModuleSymbolTable.h |  1 +
 llvm/lib/Linker/LinkModules.cpp              | 94 +++++++++++++++-----
 llvm/lib/Object/ModuleSymbolTable.cpp        |  7 ++
 llvm/test/LTO/X86/inline-asm-lto-weak.ll     | 10 ++-
 4 files changed, 85 insertions(+), 27 deletions(-)

diff --git a/llvm/include/llvm/Object/ModuleSymbolTable.h b/llvm/include/llvm/Object/ModuleSymbolTable.h
index 1134b98c2247e2..3f2834c2ff56d1 100644
--- a/llvm/include/llvm/Object/ModuleSymbolTable.h
+++ b/llvm/include/llvm/Object/ModuleSymbolTable.h
@@ -48,6 +48,7 @@ class ModuleSymbolTable {
 
   void printSymbolName(raw_ostream &OS, Symbol S) const;
   uint32_t getSymbolFlags(Symbol S) const;
+  static bool canParseInlineASM(const Module &M);
 
   /// Parse inline ASM and collect the symbols that are defined or referenced in
   /// the current module.
diff --git a/llvm/lib/Linker/LinkModules.cpp b/llvm/lib/Linker/LinkModules.cpp
index 4fe1f1a0f51833..11d7f097ea6200 100644
--- a/llvm/lib/Linker/LinkModules.cpp
+++ b/llvm/lib/Linker/LinkModules.cpp
@@ -18,6 +18,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Linker/Linker.h"
+#include "llvm/Object/ModuleSymbolTable.h"
 #include "llvm/Support/Error.h"
 using namespace llvm;
 
@@ -32,6 +33,7 @@ class ModuleLinker {
   std::unique_ptr<Module> SrcM;
 
   SetVector<GlobalValue *> ValuesToLink;
+  StringSet<> DstGlobalAsmSymbols;
 
   /// For symbol clashes, prefer those from Src.
   unsigned Flags;
@@ -79,14 +81,17 @@ class ModuleLinker {
   /// Given a global in the source module, return the global in the
   /// destination module that is being linked to, if any.
   GlobalValue *getLinkedToGlobal(const GlobalValue *SrcGV) {
-    Module &DstM = Mover.getModule();
     // If the source has no name it can't link.  If it has local linkage,
     // there is no name match-up going on.
     if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(SrcGV->getLinkage()))
       return nullptr;
+    return getLinkedToGlobal(SrcGV->getName());
+  }
 
+  GlobalValue *getLinkedToGlobal(StringRef Name) {
+    Module &DstM = Mover.getModule();
     // Otherwise see if we have a match in the destination module's symtab.
-    GlobalValue *DGV = DstM.getNamedValue(SrcGV->getName());
+    GlobalValue *DGV = DstM.getNamedValue(Name);
     if (!DGV)
       return nullptr;
 
@@ -106,6 +111,9 @@ class ModuleLinker {
 
   bool linkIfNeeded(GlobalValue &GV, SmallVectorImpl<GlobalValue *> &GVToClone);
 
+  // Drop GV if it is a weak linkage and the asm symbol is not weak.
+  void dropWeakGVForAsm();
+
 public:
   ModuleLinker(IRMover &Mover, std::unique_ptr<Module> SrcM, unsigned Flags,
                std::function<void(Module &, const StringSet<> &)>
@@ -128,6 +136,29 @@ getMinVisibility(GlobalValue::VisibilityTypes A,
   return GlobalValue::DefaultVisibility;
 }
 
+static void changeDefToDecl(GlobalValue &GV) {
+  if (auto *F = dyn_cast<Function>(&GV)) {
+    F->deleteBody();
+  } else if (auto *Var = dyn_cast<GlobalVariable>(&GV)) {
+    Var->setInitializer(nullptr);
+  } else {
+    auto &Alias = cast<GlobalAlias>(GV);
+    Module &M = *Alias.getParent();
+    GlobalValue *Declaration;
+    if (auto *FTy = dyn_cast<FunctionType>(Alias.getValueType())) {
+      Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage, "", &M);
+    } else {
+      Declaration =
+          new GlobalVariable(M, Alias.getValueType(), /*isConstant*/ false,
+                             GlobalValue::ExternalLinkage,
+                             /*Initializer*/ nullptr);
+    }
+    Declaration->takeName(&Alias);
+    Alias.replaceAllUsesWith(Declaration);
+    Alias.eraseFromParent();
+  }
+}
+
 bool ModuleLinker::getComdatLeader(Module &M, StringRef ComdatName,
                                    const GlobalVariable *&GVar) {
   const GlobalValue *GVal = M.getNamedValue(ComdatName);
@@ -325,6 +356,34 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
                    "': symbol multiply defined!");
 }
 
+void ModuleLinker::dropWeakGVForAsm() {
+  Module &DstM = Mover.getModule();
+  if (!ModuleSymbolTable::canParseInlineASM(DstM))
+    return;
+  ModuleSymbolTable::CollectAsmSymbols(
+      DstM, [&](StringRef Name, object::BasicSymbolRef::Flags Flags) {
+        if (Flags & object::BasicSymbolRef::SF_Global &&
+            !(Flags & object::BasicSymbolRef::SF_Weak))
+          DstGlobalAsmSymbols.insert(Name);
+      });
+
+  if (!ModuleSymbolTable::canParseInlineASM(*SrcM))
+    return;
+  ModuleSymbolTable::CollectAsmSymbols(
+      *SrcM, [&](StringRef Name, object::BasicSymbolRef::Flags Flags) {
+        if (Flags & object::BasicSymbolRef::SF_Global &&
+            !(Flags & object::BasicSymbolRef::SF_Weak)) {
+          auto *DstGV = getLinkedToGlobal(Name);
+          if (DstGV && DstGV->hasWeakLinkage()) {
+            if (DstGV->use_empty())
+              DstGV->eraseFromParent();
+            else
+              changeDefToDecl(*DstGV);
+          }
+        }
+      });
+}
+
 bool ModuleLinker::linkIfNeeded(GlobalValue &GV,
                                 SmallVectorImpl<GlobalValue *> &GVToClone) {
   GlobalValue *DGV = getLinkedToGlobal(&GV);
@@ -394,8 +453,14 @@ bool ModuleLinker::linkIfNeeded(GlobalValue &GV,
     return true;
   if (DGV && ComdatFrom == LinkFrom::Both)
     GVToClone.push_back(LinkFromSrc ? DGV : &GV);
-  if (LinkFromSrc)
+  if (!DGV && GV.hasWeakLinkage() &&
+      DstGlobalAsmSymbols.contains(GV.getName())) {
+    changeDefToDecl(GV);
+    LinkFromSrc = false;
+  }
+  if (LinkFromSrc) {
     ValuesToLink.insert(&GV);
+  }
   return false;
 }
 
@@ -436,27 +501,7 @@ void ModuleLinker::dropReplacedComdat(
     GV.eraseFromParent();
     return;
   }
-
-  if (auto *F = dyn_cast<Function>(&GV)) {
-    F->deleteBody();
-  } else if (auto *Var = dyn_cast<GlobalVariable>(&GV)) {
-    Var->setInitializer(nullptr);
-  } else {
-    auto &Alias = cast<GlobalAlias>(GV);
-    Module &M = *Alias.getParent();
-    GlobalValue *Declaration;
-    if (auto *FTy = dyn_cast<FunctionType>(Alias.getValueType())) {
-      Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage, "", &M);
-    } else {
-      Declaration =
-          new GlobalVariable(M, Alias.getValueType(), /*isConstant*/ false,
-                             GlobalValue::ExternalLinkage,
-                             /*Initializer*/ nullptr);
-    }
-    Declaration->takeName(&Alias);
-    Alias.replaceAllUsesWith(Declaration);
-    Alias.eraseFromParent();
-  }
+  changeDefToDecl(GV);
 }
 
 bool ModuleLinker::run() {
@@ -533,6 +578,7 @@ bool ModuleLinker::run() {
       if (const Comdat *SC = GA.getComdat())
         LazyComdatMembers[SC].push_back(&GA);
 
+  dropWeakGVForAsm();
   // Insert all of the globals in src into the DstM module... without linking
   // initializers (which could refer to functions not yet mapped over).
   SmallVector<GlobalValue *, 0> GVToClone;
diff --git a/llvm/lib/Object/ModuleSymbolTable.cpp b/llvm/lib/Object/ModuleSymbolTable.cpp
index 07f76688fa43e7..d96d2d8caafbae 100644
--- a/llvm/lib/Object/ModuleSymbolTable.cpp
+++ b/llvm/lib/Object/ModuleSymbolTable.cpp
@@ -140,6 +140,13 @@ initializeRecordStreamer(const Module &M,
   Init(Streamer);
 }
 
+bool ModuleSymbolTable::canParseInlineASM(const Module &M) {
+  std::string Err;
+  const Triple TT(M.getTargetTriple());
+  const Target *T = TargetRegistry::lookupTarget(TT.str(), Err);
+  return T && T->hasMCAsmParser();
+}
+
 void ModuleSymbolTable::CollectAsmSymbols(
     const Module &M,
     function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) {
diff --git a/llvm/test/LTO/X86/inline-asm-lto-weak.ll b/llvm/test/LTO/X86/inline-asm-lto-weak.ll
index 8b29929c3e5b91..b8d694835d6f7a 100644
--- a/llvm/test/LTO/X86/inline-asm-lto-weak.ll
+++ b/llvm/test/LTO/X86/inline-asm-lto-weak.ll
@@ -7,9 +7,10 @@
 ; RUN: not llvm-lto %t/asm_global.bc %t/fn_global.bc -o %to1.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR
 ; RUN: not llvm-lto %t/fn_global.bc %t/asm_global.bc -o %to1.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR
 
-; FIXME: We want to drop the weak function.
-; RUN: not llvm-lto %t/asm_global.bc %t/fn_weak.bc -o %to2.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR
-; RUN: not llvm-lto %t/fn_weak.bc %t/asm_global.bc -o %to2.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: llvm-lto %t/asm_global.bc %t/fn_weak.bc -o %to2.o --save-linked-module --exported-symbol=foo
+; RUN: llvm-dis < %to2.o.linked.bc | FileCheck %s --check-prefix=ASM_GLOBAL
+; RUN: llvm-lto %t/fn_weak.bc %t/asm_global.bc -o %to2.o --save-linked-module --exported-symbol=foo
+; RUN: llvm-dis < %to2.o.linked.bc | FileCheck %s --check-prefix=ASM_GLOBAL
 
 ; FIXME: We want to drop the weak asm symbol.
 ; RUN: not llvm-lto %t/asm_global.bc %t/asm_weak.bc -o %to3.o --save-linked-module --exported-symbol=foo 2>&1 | FileCheck %s --check-prefix=ERR
@@ -33,6 +34,9 @@
 
 ; FN_GLOBAL: define i32 @foo(i32 %i)
 
+; ASM_GLOBAL: module asm ".global foo"
+; ASM_GLOBAL-NOT: define i32 @foo(i32 %i)
+
 ;--- asm_global.ll
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"

>From df5262d987d67bbd93d2a4de95b473688a5e55c7 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sun, 24 Dec 2023 11:14:51 +0800
Subject: [PATCH 3/3] Check if getDiagHandlerPtr() is null

---
 llvm/lib/Object/ModuleSymbolTable.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Object/ModuleSymbolTable.cpp b/llvm/lib/Object/ModuleSymbolTable.cpp
index d96d2d8caafbae..a740606855e522 100644
--- a/llvm/lib/Object/ModuleSymbolTable.cpp
+++ b/llvm/lib/Object/ModuleSymbolTable.cpp
@@ -72,7 +72,8 @@ initializeRecordStreamer(const Module &M,
   // This function may be called twice, once for ModuleSummaryIndexAnalysis and
   // the other when writing the IR symbol table. If parsing inline assembly has
   // caused errors in the first run, suppress the second run.
-  if (M.getContext().getDiagHandlerPtr()->HasErrors)
+  if (M.getContext().getDiagHandlerPtr() &&
+      M.getContext().getDiagHandlerPtr()->HasErrors)
     return;
   StringRef InlineAsm = M.getModuleInlineAsm();
   if (InlineAsm.empty())



More information about the llvm-commits mailing list