[lld] [llvm] [LTO] [LLD] Don't alias the __imp_func and func symbol resolutions (PR #71376)

Martin Storsjö via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 19 02:42:26 PST 2023


https://github.com/mstorsjo updated https://github.com/llvm/llvm-project/pull/71376

>From 138e915176877cb629dc787ad29b1b5b1a964f82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin at martin.st>
Date: Sun, 19 Nov 2023 12:18:49 +0200
Subject: [PATCH 1/2] [LLD] [COFF] Add tests to observe details about LTO and
 __imp_ prefixes. NFC.

The commit 3ab6209a3f93bdbeec8e9b9fcc00a9a4980915ff had the undesired
effect of retaining every __imp_ symbol, even if it isn't referenced
in any way. Add a testcase to observe this behaviour, to serve as
a reference point if this behaviour were to be improved later.

Port the testcase from b963c0b658cc54b370832df4f5a3d63fd69da334 from
the llvm/LTO testsuite into LLD as a preparation for changing that fix;
the moved testcase has a comment for one case which doesn't work
currently.

The testcase is ported mostly as is, but with symbol mangling simplified,
rewriting function names from MSVC C++ mangling to plain C, and
unnecessary debug info is removed.

Also extend the testcase to test combinations of both regular
object files and LTO objects. Leave out one combination which currently
fails, with a comment.
---
 lld/test/COFF/lto-dllimport.ll        | 44 +++++++++++++++++++++++++++
 lld/test/COFF/lto-imp-prefix.ll       | 17 ++++++++++-
 llvm/test/LTO/X86/Inputs/dllimport.ll | 17 -----------
 llvm/test/LTO/X86/dllimport.ll        | 30 ------------------
 4 files changed, 60 insertions(+), 48 deletions(-)
 create mode 100644 lld/test/COFF/lto-dllimport.ll
 delete mode 100644 llvm/test/LTO/X86/Inputs/dllimport.ll
 delete mode 100644 llvm/test/LTO/X86/dllimport.ll

diff --git a/lld/test/COFF/lto-dllimport.ll b/lld/test/COFF/lto-dllimport.ll
new file mode 100644
index 000000000000000..f3606899838b17a
--- /dev/null
+++ b/lld/test/COFF/lto-dllimport.ll
@@ -0,0 +1,44 @@
+; REQUIRES: x86
+; RUN: split-file %s %t.dir
+
+; RUN: llvm-as %t.dir/main.ll -o %t.main.bc
+; RUN: llvm-as %t.dir/other.ll -o %t.other.bc
+; RUN: llc %t.dir/main.ll -o %t.main.obj --filetype=obj
+; RUN: llc %t.dir/other.ll -o %t.other.obj --filetype=obj
+
+; RUN: lld-link %t.main.obj %t.other.obj -entry:main -out:%t.exe
+; RUN: lld-link %t.main.bc  %t.other.bc  -entry:main -out:%t.exe
+; RUN: lld-link %t.main.bc  %t.other.obj -entry:main -out:%t.exe
+
+;; This test currently fails if combining the regular object file main.obj
+;; with the bitcode object file other.bc.
+; RUN-skipped: lld-link %t.main.obj %t.other.bc  -entry:main -out:%t.exe
+
+;--- main.ll
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.11.0"
+
+define dso_local i32 @main() local_unnamed_addr {
+entry:
+  %call = tail call i32 @foo()
+  ret i32 %call
+}
+
+declare dllimport i32 @foo() local_unnamed_addr
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ThinLTO", i32 0}
+
+;--- other.ll
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.11.0"
+
+define dso_local i32 @foo() local_unnamed_addr {
+entry:
+  ret i32 42
+}
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ThinLTO", i32 0}
diff --git a/lld/test/COFF/lto-imp-prefix.ll b/lld/test/COFF/lto-imp-prefix.ll
index 503e4fd0649e41d..c0b3bea7841c435 100644
--- a/lld/test/COFF/lto-imp-prefix.ll
+++ b/lld/test/COFF/lto-imp-prefix.ll
@@ -5,7 +5,15 @@
 ; RUN: llvm-as %t.dir/main.ll -o %t.main.obj
 ; RUN: llvm-as %t.dir/other.ll -o %t.other.obj
 
-; RUN: lld-link /entry:entry %t.main.obj %t.other.obj /out:%t.exe /subsystem:console
+; RUN: lld-link /entry:entry %t.main.obj %t.other.obj /out:%t.exe /subsystem:console /debug:symtab
+
+;; The current implementation for handling __imp_ symbols retains all of them.
+;; Observe that this currently produces __imp_unusedFunc even if nothing
+;; references unusedFunc in any form.
+
+; RUN: llvm-nm %t.exe | FileCheck %s
+
+; CHECK: __imp_unusedFunc
 
 ;--- main.ll
 target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
@@ -33,6 +41,13 @@ entry:
   ret void
 }
 
+ at __imp_unusedFunc = global ptr @unusedFuncReplacement
+
+define internal void @unusedFuncReplacement() {
+entry:
+  ret void
+}
+
 define void @other() {
 entry:
   ret void
diff --git a/llvm/test/LTO/X86/Inputs/dllimport.ll b/llvm/test/LTO/X86/Inputs/dllimport.ll
deleted file mode 100644
index f3daab1df09742d..000000000000000
--- a/llvm/test/LTO/X86/Inputs/dllimport.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; ModuleID = 'b.obj'
-source_filename = "b.cpp"
-target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc19.11.0"
-
-; Function Attrs: norecurse nounwind readnone sspstrong uwtable
-define dso_local i32 @"?foo@@YAHXZ"() local_unnamed_addr {
-entry:
-  ret i32 42
-}
-
-!llvm.module.flags = !{!1}
-
-!1 = !{i32 1, !"ThinLTO", i32 0}
-
-^0 = module: (path: "b.obj", hash: (0, 0, 0, 0, 0))
-^1 = gv: (name: "?foo@@YAHXZ", summaries: (function: (module: ^0, flags: (linkage: external, notEligibleToImport: 1, live: 0, dsoLocal: 1), insts: 1, funcFlags: (readNone: 1, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0)))) ; guid = 2709792123250749187
diff --git a/llvm/test/LTO/X86/dllimport.ll b/llvm/test/LTO/X86/dllimport.ll
deleted file mode 100644
index eb9f7df5c8b30a5..000000000000000
--- a/llvm/test/LTO/X86/dllimport.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; Test requiring LTO to remove the __imp_ prefix for locally imported COFF
-; symbols (mirroring how lld handles these symbols).
-; RUN: llvm-as %s -o %t.obj
-; RUN: llvm-as %S/Inputs/dllimport.ll -o %t2.obj
-; RUN: llvm-lto2 run -r=%t.obj,main,px -r %t.obj,__imp_?foo@@YAHXZ -r %t2.obj,?foo@@YAHXZ,p -o %t3 %t.obj %t2.obj -save-temps
-; RUN: llvm-dis %t3.0.0.preopt.bc -o - | FileCheck %s
-
-; CHECK: define dso_local i32 @"?foo@@YAHXZ"()
-
-; ModuleID = 'a.obj'
-source_filename = "a.cpp"
-target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc19.11.0"
-
-; Function Attrs: norecurse nounwind sspstrong uwtable
-define dso_local i32 @main() local_unnamed_addr {
-entry:
-  %call = tail call i32 @"?foo@@YAHXZ"()
-  ret i32 %call
-}
-
-declare dllimport i32 @"?foo@@YAHXZ"() local_unnamed_addr
-
-!llvm.module.flags = !{!1}
-
-!1 = !{i32 1, !"ThinLTO", i32 0}
-
-^0 = module: (path: "a.obj", hash: (0, 0, 0, 0, 0))
-^1 = gv: (name: "?foo@@YAHXZ") ; guid = 2709792123250749187
-^2 = gv: (name: "main", summaries: (function: (module: ^0, flags: (linkage: external, notEligibleToImport: 1, live: 0, dsoLocal: 1), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0), calls: ((callee: ^1))))) ; guid = 15822663052811949562

>From 0f0bbb4937c8a7bda63152fb29bfc4f141a84c3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin at martin.st>
Date: Fri, 27 Oct 2023 18:54:00 +0300
Subject: [PATCH 2/2] [LTO] [LLD] Don't alias the __imp_func and func symbol
 resolutions

Commit b963c0b658cc54b370832df4f5a3d63fd69da334 fixed LTO
compilation of cases where one translation unit is calling a
function with the dllimport attribute, and another translation
unit provides this function locally within the same linked module
(i.e. not actually dllimported); see
https://github.com/llvm/llvm-project/issues/37453 or
https://bugs.llvm.org/show_bug.cgi?id=38105 for full context.

This was fixed by aliasing their GlobalResolution structs, for
the __imp_ prefixed and non prefixed symbols.

I believe this fix to be wrong.

This patch reverts that fix, and fixes the same issue differently,
within LLD instead.

The fix assumed that one can treat the __imp_ prefixed and
unprefixed symbols as equal, referencing SVN r240620
(d766653534e0cff702e42a43b44d3057b6094fea). However that referenced
commit had mistaken how this logic works, which was corrected
later in SVN r240622 (88e0f9206b4dccb56dee931adab08f89ff80525a);
those symbols aren't direct aliases for each other - but if
there's a need for the __imp_ prefixed one and the other one
exists, the __imp_ prefixed one is created, as a pointer to
the other one.

However this fix only works if both translation units are compiled
as LTO; if the caller is compiled as a regular object file and
the callee is compiled as LTO, the fix fails, as the LTO compilation
doesn't know that the unprefixed symbol is needed.

The only level that knows of the potential relationship between
the __imp_ prefixed and unprefixed symbol, across regular and
bitcode object files, is LLD itself.

Therefore, revert the original fix from
b963c0b658cc54b370832df4f5a3d63fd69da334, and
fix the issue differently - when concluding that we can
fulfill an undefined symbol starting with __imp_, mark the
corresponding non prefixed symbol as used in a regular object
for the LTO compilation, to make sure that this non prefixed
symbol exists after the LTO compilation, to let LLD do the
fixup of the local import.

Extend the testcase to test a regular object file calling an
LTO object file, which previously failed.

This change also fixes another issue; an object file can provide
both unprefixed and prefixed versions of the same symbol, like
this:

    void importedFunc(void) {
    }
    void (*__imp_importedFunc)(void) = importedFunc;

That allows the function to be called both with and without
dllimport markings. (The concept of automatically resolving
a reference to __imp_func to a locally defined func only is
done in MSVC style linkers, but not in GNU ld, therefore MinGW
mode code often uses this construct.)

Previously, the aliasing of global resolutions at the LTO level
would trigger a failed assert with "Multiple prevailing defs are
not allowed" for this case, as both importedFunc and
__imp_importedFunc could be prevailing. Add a case to the existing
LLD test case lto-imp-prefix.ll to test this as well.

This change (together with previous change in
3ab6209a3f93bdbeec8e9b9fcc00a9a4980915ff) completes LLD to work
with mingw-w64-crt files (the base glue code for a mingw-w64
toolchain) built with LTO.
---
 lld/COFF/SymbolTable.cpp        |  4 +++-
 lld/test/COFF/lto-dllimport.ll  |  5 +----
 lld/test/COFF/lto-imp-prefix.ll | 29 +++++++++++++++++++++++++----
 llvm/lib/LTO/LTO.cpp            |  8 +-------
 4 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index 15c76461a84a92c..44aa506d2c35da9 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -462,8 +462,10 @@ void SymbolTable::reportUnresolvable() {
     StringRef name = undef->getName();
     if (name.starts_with("__imp_")) {
       Symbol *imp = find(name.substr(strlen("__imp_")));
-      if (imp && isa<Defined>(imp))
+      if (Defined *def = dyn_cast_or_null<Defined>(imp)) {
+        def->isUsedInRegularObj = true;
         continue;
+      }
     }
     if (name.contains("_PchSym_"))
       continue;
diff --git a/lld/test/COFF/lto-dllimport.ll b/lld/test/COFF/lto-dllimport.ll
index f3606899838b17a..599fc87cf11e1cc 100644
--- a/lld/test/COFF/lto-dllimport.ll
+++ b/lld/test/COFF/lto-dllimport.ll
@@ -9,10 +9,7 @@
 ; RUN: lld-link %t.main.obj %t.other.obj -entry:main -out:%t.exe
 ; RUN: lld-link %t.main.bc  %t.other.bc  -entry:main -out:%t.exe
 ; RUN: lld-link %t.main.bc  %t.other.obj -entry:main -out:%t.exe
-
-;; This test currently fails if combining the regular object file main.obj
-;; with the bitcode object file other.bc.
-; RUN-skipped: lld-link %t.main.obj %t.other.bc  -entry:main -out:%t.exe
+; RUN: lld-link %t.main.obj %t.other.bc  -entry:main -out:%t.exe
 
 ;--- main.ll
 target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/lld/test/COFF/lto-imp-prefix.ll b/lld/test/COFF/lto-imp-prefix.ll
index c0b3bea7841c435..56a7c48cc9d168a 100644
--- a/lld/test/COFF/lto-imp-prefix.ll
+++ b/lld/test/COFF/lto-imp-prefix.ll
@@ -3,18 +3,21 @@
 ; RUN: rm -rf %t.dir
 ; RUN: split-file %s %t.dir
 ; RUN: llvm-as %t.dir/main.ll -o %t.main.obj
-; RUN: llvm-as %t.dir/other.ll -o %t.other.obj
+; RUN: llvm-as %t.dir/other1.ll -o %t.other1.obj
+; RUN: llvm-as %t.dir/other2.ll -o %t.other2.obj
 
-; RUN: lld-link /entry:entry %t.main.obj %t.other.obj /out:%t.exe /subsystem:console /debug:symtab
+; RUN: lld-link /entry:entry %t.main.obj %t.other1.obj /out:%t1.exe /subsystem:console /debug:symtab
 
 ;; The current implementation for handling __imp_ symbols retains all of them.
 ;; Observe that this currently produces __imp_unusedFunc even if nothing
 ;; references unusedFunc in any form.
 
-; RUN: llvm-nm %t.exe | FileCheck %s
+; RUN: llvm-nm %t1.exe | FileCheck %s
 
 ; CHECK: __imp_unusedFunc
 
+; RUN: lld-link /entry:entry %t.main.obj %t.other2.obj /out:%t2.exe /subsystem:console
+
 ;--- main.ll
 target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-w64-windows-gnu"
@@ -30,7 +33,7 @@ declare dllimport void @importedFunc()
 
 declare void @other()
 
-;--- other.ll
+;--- other1.ll
 target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-w64-windows-gnu"
 
@@ -52,3 +55,21 @@ define void @other() {
 entry:
   ret void
 }
+
+;--- other2.ll
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-w64-windows-gnu"
+
+ at __imp_importedFunc = global ptr @importedFunc
+
+; Test with two external symbols with the same name, with/without the __imp_
+; prefix.
+define void @importedFunc() {
+entry:
+  ret void
+}
+
+define void @other() {
+entry:
+  ret void
+}
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index e111e09681178e2..05836fd28f52882 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -610,13 +610,7 @@ void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
     assert(ResI != ResE);
     SymbolResolution Res = *ResI++;
 
-    StringRef Name = Sym.getName();
-    // Strip the __imp_ prefix from COFF dllimport symbols (similar to the
-    // way they are handled by lld), otherwise we can end up with two
-    // global resolutions (one with and one for a copy of the symbol without).
-    if (TT.isOSBinFormatCOFF() && Name.startswith("__imp_"))
-      Name = Name.substr(strlen("__imp_"));
-    auto &GlobalRes = GlobalResolutions[Name];
+    auto &GlobalRes = GlobalResolutions[Sym.getName()];
     GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr();
     if (Res.Prevailing) {
       assert(!GlobalRes.Prevailing &&



More information about the llvm-commits mailing list