[lld] [ELF] Add --thinlto-index= (PR #109534)

via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 21 09:35:44 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lld-elf

Author: Fangrui Song (MaskRay)

<details>
<summary>Changes</summary>

--thinlto-index= is designed to replace --thinlto-index-only= for distributed
ThinLTO. Example:

Index files and backend compile files are in the same directory:
```
echo 'int g() { return 0; }' > a.c
echo 'int f(); int main() { return f(); }' > b.c
echo 'int g(); int f() { return g(); }' > c.c
echo '' > d.c
mkdir -p thin obj

clang -c -flto=thin -O2 b.c -o thin/b.o -fthin-link-bitcode=thin/b.min.o
clang -c -flto=thin -O2 c.c -o thin/c.o -fthin-link-bitcode=thin/c.min.o
clang -c -flto=thin -O2 d.c -o thin/d.o -fthin-link-bitcode=thin/d.min.o
clang -c -O2 a.c -o thin/a.o

clang -flto=thin -fuse-ld=lld -Wl,--thinlto-index=obj/exe.map,--thinlto-emit-imports-files,--thinlto-prefix-replace='thin;obj',--thinlto-object-suffix-replace='.min.o;.o' \
  a.o thin/b.min.o -Wl,--start-lib thin/c.min.o thin/d.min.o -Wl,--end-lib

clang -c -O2 -fthinlto-index=obj/b.o.thinlto.bc thin/b.o -o obj/b.o
clang -c -O2 -fthinlto-index=obj/c.o.thinlto.bc thin/c.o -o obj/c.o
clang -c -O2 -fthinlto-index=obj/d.o.thinlto.bc thin/d.o -o obj/d.o

clang -fuse-ld=lld -Wl,--remap-inputs-file=obj/exe.map a.o thin/b.min.o -Wl,--start-lib thin/c.min.o thin/d.min.o -Wl,--end-lib -o obj/exe
```

Index files and backend compile files are in different directories:
```
echo 'int g() { return 0; }' > a.c
echo 'int f(); int main() { return f(); }' > b.c
echo 'int g(); int f() { return g(); }' > c.c
echo '' > d.c
mkdir -p thin index obj

clang -c -flto=thin -O2 b.c -o thin/b.o -fthin-link-bitcode=thin/b.min.o
clang -c -flto=thin -O2 c.c -o thin/c.o -fthin-link-bitcode=thin/c.min.o
clang -c -flto=thin -O2 d.c -o thin/d.o -fthin-link-bitcode=thin/d.min.o
clang -c -O2 a.c -o a.o

clang -flto=thin -fuse-ld=lld -Wl,--thinlto-index=obj/exe.map,--thinlto-emit-imports-files,--thinlto-prefix-replace='thin;index;obj',--thinlto-object-suffix-replace='.min.o;.o' \
  a.o thin/b.min.o -Wl,--start-lib thin/c.min.o thin/d.min.o -Wl,--end-lib

clang -c -O2 -fthinlto-index=index/b.o.thinlto.bc thin/b.o -o obj/b.o
clang -c -O2 -fthinlto-index=index/c.o.thinlto.bc thin/c.o -o obj/c.o
clang -c -O2 -fthinlto-index=index/d.o.thinlto.bc thin/c.o -o obj/d.o

clang -fuse-ld=lld -Wl,--remap-inputs-file=obj/exe.map a.o thin/b.min.o -Wl,--start-lib thin/c.min.o thin/d.min.o -Wl,--end-lib -o obj/exe
```

The ThinLTO indexing and the final link have very similar command lines.
Actually we just need to replace `-Wl,--thinlto-index=` with `-Wl,--remap-inputs-file=`.
`obj/exe.map` redirects input minimized bitcode files to backend compilation outputs.

---

Here is an example of the old way (`--thinlto-index-only=`).
```
clang -flto=thin -fuse-ld=lld -Wl,--thinlto-index-only=obj/exe.params,--thinlto-prefix-replace='thin;obj',--thinlto-object-suffix-replace='.min.o;.o' \
  a.o thin/b.min.o -Wl,--start-lib thin/c.min.o thin/d.min.o -Wl,--end-lib
...
clang -fuse-ld=lld @<!-- -->obj/exe.params a.o -o obj/exe
```

--thinlto-index-only= specifies a response file containing all the backend
compilation produced native object files. The final link command line drops all
bitcode files (the build system recognize these files) and inserts `@<!-- -->obj/exe.params`
at the beginning. This reordering may cause different symbol resolution results
for ThinLTO indexing and the final link, which may lead to

* a different --start-lib native object file is picked
* spurious "undefined symbol": say, the different native object file may call a function defined
  in a bitcode file. The backend compilation for the bitcode file may not export the definition
  since it doesn't know it is referenced by this native object file.

Rejected alternative: let the response file include native object files.
This has complication:

* lld has to serialize all input files (shared objects, archives, linker scripts, etc) as well as --whole-archive/--as-needed states
* the build system has to recognize input files (including linker script) and remove them
* -nostdlib is required

Link: https://discourse.llvm.org/t/distributed-thinlto-final-linking-order/63804


---
Full diff: https://github.com/llvm/llvm-project/pull/109534.diff


8 Files Affected:

- (modified) lld/ELF/Config.h (+1) 
- (modified) lld/ELF/Driver.cpp (+7-4) 
- (modified) lld/ELF/LTO.cpp (+22) 
- (modified) lld/ELF/Options.td (+1) 
- (modified) lld/docs/ReleaseNotes.rst (+4) 
- (added) lld/test/ELF/lto/thinlto-index-file.ll (+30) 
- (added) lld/test/ELF/lto/thinlto-index.ll (+108) 
- (modified) lld/test/ELF/lto/thinlto-single-module.ll (+11) 


``````````diff
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index 7cae8677ef5ce1..a0a8f1c239161b 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -215,6 +215,7 @@ struct Config {
   llvm::StringRef soName;
   llvm::StringRef sysroot;
   llvm::StringRef thinLTOCacheDir;
+  llvm::StringRef thinLTOIndex;
   llvm::StringRef thinLTOIndexOnlyArg;
   llvm::StringRef whyExtract;
   llvm::StringRef cmseInputLib;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index e25db0e4951275..e7b34661848c3b 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1449,10 +1449,13 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
       parseCachePruningPolicy(args.getLastArgValue(OPT_thinlto_cache_policy)),
       "--thinlto-cache-policy: invalid cache policy");
   ctx.arg.thinLTOEmitImportsFiles = args.hasArg(OPT_thinlto_emit_imports_files);
-  ctx.arg.thinLTOEmitIndexFiles = args.hasArg(OPT_thinlto_emit_index_files) ||
+  ctx.arg.thinLTOIndex = args.getLastArgValue(OPT_thinlto_index);
+  ctx.arg.thinLTOEmitIndexFiles = ctx.arg.thinLTOIndex.size() ||
+                                  args.hasArg(OPT_thinlto_emit_index_files) ||
                                   args.hasArg(OPT_thinlto_index_only) ||
                                   args.hasArg(OPT_thinlto_index_only_eq);
-  ctx.arg.thinLTOIndexOnly = args.hasArg(OPT_thinlto_index_only) ||
+  ctx.arg.thinLTOIndexOnly = ctx.arg.thinLTOIndex.size() ||
+                             args.hasArg(OPT_thinlto_index_only) ||
                              args.hasArg(OPT_thinlto_index_only_eq);
   ctx.arg.thinLTOIndexOnlyArg = args.getLastArgValue(OPT_thinlto_index_only_eq);
   ctx.arg.thinLTOObjectSuffixReplace =
@@ -1469,7 +1472,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
             "--thinlto-emit-index-files");
   }
   if (!ctx.arg.thinLTOPrefixReplaceNativeObject.empty() &&
-      ctx.arg.thinLTOIndexOnlyArg.empty()) {
+      ctx.arg.thinLTOIndex.empty() && ctx.arg.thinLTOIndexOnlyArg.empty()) {
     error("--thinlto-prefix-replace=old_dir;new_dir;obj_dir must be used with "
           "--thinlto-index-only=");
   }
@@ -2983,7 +2986,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
 
   // Skip the normal linked output if some LTO options are specified.
   //
-  // For --thinlto-index-only, index file creation is performed in
+  // For --thinlto-index{,-only}, index file creation is performed in
   // compileBitcodeFiles, so we are done afterwards. --plugin-opt=emit-llvm and
   // --plugin-opt=emit-asm create output files in bitcode or assembly code,
   // respectively. When only certain thinLTO modules are specified for
diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index f339f1c2c0ec21..2398b36dad4f72 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -197,6 +197,28 @@ BitcodeCompiler::BitcodeCompiler() {
       createConfig(), backend, config->ltoPartitions,
       ltoModes[config->ltoKind]);
 
+  // Write a remapping file for the final native link. Non-lazy bitcode files
+  // are mapped to native object files. If lazy bitcode files is a minimized
+  // bitcode, it cannot participate the final link. Print /dev/null to ignore
+  // it.
+  if (!ctx.arg.thinLTOIndex.empty()) {
+    if (auto os = openFile(ctx.arg.thinLTOIndex)) {
+      for (BitcodeFile *file : ctx.bitcodeFiles) {
+        StringRef nativeDir = ctx.arg.thinLTOPrefixReplaceNativeObject.empty()
+                               ? ctx.arg.thinLTOPrefixReplaceNew
+                               : ctx.arg.thinLTOPrefixReplaceNativeObject;
+        *os << file->getName() << '='
+            << lto::getThinLTOOutputFile(replaceThinLTOSuffix(file->getName()),
+                                         ctx.arg.thinLTOPrefixReplaceOld,
+                                         nativeDir)
+            << '\n';
+      }
+      for (BitcodeFile *file : ctx.lazyBitcodeFiles)
+        if (file->lazy)
+          *os << file->getName() << "=/dev/null\n";
+    }
+  }
+
   // Initialize usedStartStop.
   if (ctx.bitcodeFiles.empty())
     return;
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index c80c4017d3512c..6de246fe02c165 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -676,6 +676,7 @@ def thinlto_cache_dir: JJ<"thinlto-cache-dir=">,
 defm thinlto_cache_policy: EEq<"thinlto-cache-policy", "Pruning policy for the ThinLTO cache">;
 def thinlto_emit_imports_files: FF<"thinlto-emit-imports-files">;
 def thinlto_emit_index_files: FF<"thinlto-emit-index-files">;
+def thinlto_index: JJ<"thinlto-index=">;
 def thinlto_index_only: FF<"thinlto-index-only">;
 def thinlto_index_only_eq: JJ<"thinlto-index-only=">;
 def thinlto_jobs_eq: JJ<"thinlto-jobs=">,
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index 6d09de10e7195e..0f0396bc71e2e8 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -36,6 +36,10 @@ ELF Improvements
   increases the expressive power of linker scripts.
   (`#95323 <https://github.com/llvm/llvm-project/pull/95323>`_)
 
+* Experimental ``--thinlto-index=`` is added for distributed ThinLTO,
+  addressing some symbol resolution issues with the old ``--thinlto-index-only=``.
+  The ThinLTO indexing and the final link have very similar command lines.
+
 Breaking changes
 ----------------
 
diff --git a/lld/test/ELF/lto/thinlto-index-file.ll b/lld/test/ELF/lto/thinlto-index-file.ll
new file mode 100644
index 00000000000000..0384527af0a858
--- /dev/null
+++ b/lld/test/ELF/lto/thinlto-index-file.ll
@@ -0,0 +1,30 @@
+; REQUIRES: x86
+;; Test --thinlto-index-only= for distributed ThinLTO.
+;; This option is discouraged in favor of --thinlto-index=.
+
+; RUN: rm -rf %t && mkdir %t && cd %t
+; RUN: opt -module-summary %s -o 1.o
+; RUN: opt -module-summary %p/Inputs/thinlto.ll -o 2.o
+; RUN: opt -module-summary %p/Inputs/thinlto_empty.ll -o 3.o
+
+;; Ensure lld writes linked files to linked objects file
+; RUN: ld.lld --plugin-opt=thinlto-index-only=1.txt -shared 1.o 2.o 3.o -o /dev/null
+; RUN: FileCheck %s < 1.txt
+; CHECK: 1.o
+; CHECK: 2.o
+; CHECK: 3.o
+
+;; Check that this also works without the --plugin-opt= prefix.
+; RUN: ld.lld --thinlto-index-only=2.txt -shared 1.o 2.o 3.o -o /dev/null
+; RUN: diff 1.txt 2.txt
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @g(...)
+
+define void @f() {
+entry:
+  call void (...) @g()
+  ret void
+}
diff --git a/lld/test/ELF/lto/thinlto-index.ll b/lld/test/ELF/lto/thinlto-index.ll
new file mode 100644
index 00000000000000..68e6eb4f1ef829
--- /dev/null
+++ b/lld/test/ELF/lto/thinlto-index.ll
@@ -0,0 +1,108 @@
+; REQUIRES: x86
+;; Test --thinlto-index= for distributed ThinLTO.
+
+; RUN: rm -rf %t && split-file %s %t && mkdir %t/thin %t/index && cd %t
+; RUN: llvm-mc -filetype=obj -triple=x86_64 main.s -o main.o
+; RUN: llvm-mc -filetype=obj -triple=x86_64 c.s -o c.o
+; RUN: opt -thinlto-bc a.ll -o thin/a.o -thin-link-bitcode-file=thin/a.min.o
+; RUN: opt -thinlto-bc bc.ll -o thin/bc.o -thin-link-bitcode-file=thin/bc.min.o
+; RUN: opt -thinlto-bc %p/Inputs/thinlto_empty.ll -o thin/empty.o -thin-link-bitcode-file=thin/empty.min.o
+; RUN: opt -thinlto-bc %p/Inputs/thinlto_empty.ll -o thin/empty1.o -thin-link-bitcode-file=thin/empty1.min.o
+
+; RUN: ld.lld --thinlto-index=1.map --thinlto-emit-imports-files --thinlto-prefix-replace='thin;obj' \
+; RUN:   --thinlto-object-suffix-replace='.min.o;.o' main.o thin/a.min.o \
+; RUN:   --start-lib thin/bc.min.o thin/empty.min.o --end-lib thin/empty1.min.o --start-lib c.o --end-lib -o 1
+; RUN: FileCheck --input-file=1.map %s --implicit-check-not={{.}} --match-full-lines --strict-whitespace
+
+;; No entry for empty.min.o which is not extracted. empty1.min.o is present,
+;; otherwise the final link may try in-process ThinLTO backend compilation.
+;      CHECK:thin/a.min.o=obj/a.o
+; CHECK-NEXT:thin/bc.min.o=obj/bc.o
+; CHECK-NEXT:thin/empty1.min.o=obj/empty1.o
+; CHECK-NEXT:thin/empty.min.o=/dev/null
+
+;; Nevertheless, empty.o.{imports,thinlto.bc} exist to meet the build system requirement.
+; RUN: ls obj/a.o.imports obj/bc.o.imports obj/empty.o.imports obj/empty1.o.imports
+
+; RUN: llvm-bcanalyzer -dump obj/a.o.thinlto.bc | FileCheck %s --check-prefix=BACKENDA
+; RUN: llvm-bcanalyzer -dump obj/bc.o.thinlto.bc | FileCheck %s --check-prefix=BACKENDB
+; RUN: llvm-bcanalyzer -dump obj/empty.o.thinlto.bc | FileCheck %s --check-prefix=BACKENDE
+; RUN: llvm-bcanalyzer -dump obj/empty1.o.thinlto.bc | FileCheck %s --check-prefix=BACKENDE1
+
+; BACKENDA:       <MODULE_STRTAB_BLOCK
+; BACKENDA-NEXT:  <ENTRY {{.*}} record string = 'thin/a.o'
+; BACKENDA-NEXT:  <HASH
+; BACKENDA-NEXT:  <ENTRY {{.*}} record string = 'thin/bc.o'
+; BACKENDA-NEXT:  <HASH
+; BACKENDA-NEXT:  </MODULE_STRTAB_BLOCK>
+
+; BACKENDB:       <MODULE_STRTAB_BLOCK
+; BACKENDB-NEXT:  <ENTRY {{.*}} record string = 'thin/bc.o'
+; BACKENDB-NEXT:  <HASH
+; BACKENDB-NEXT:  </MODULE_STRTAB_BLOCK>
+
+; BACKENDE:       <MODULE_STRTAB_BLOCK
+; BACKENDE-NEXT:  </MODULE_STRTAB_BLOCK>
+
+; BACKENDE1:      <MODULE_STRTAB_BLOCK
+; BACKENDE1-NEXT: <ENTRY {{.*}} record string = 'thin/empty1.o'
+; BACKENDE1-NEXT: <HASH
+; BACKENDE1-NEXT: </MODULE_STRTAB_BLOCK>
+
+;; Thin archives can be used as well.
+; RUN: llvm-ar rcTS thin.a thin/bc.min.o thin/empty.min.o
+; RUN: ld.lld --thinlto-index=2.map --thinlto-prefix-replace='thin;obj' \
+; RUN:   --thinlto-object-suffix-replace='.min.o;.o' main.o \
+; RUN:   thin/a.min.o thin.a thin/empty1.min.o -o 2
+; RUN: FileCheck --input-file=2.map %s --implicit-check-not={{.}} --match-full-lines --strict-whitespace
+
+;; For regular archives, the filename may be insufficient to locate the archive and the particular member.
+; RUN: llvm-ar rcS bc.a thin/bc.min.o thin/empty.min.o
+; RUN: ld.lld --thinlto-index=3.map --thinlto-prefix-replace='thin;obj' \
+; RUN:   --thinlto-object-suffix-replace='.min.o;.o' main.o \
+; RUN:   thin/a.min.o bc.a thin/empty1.min.o -o 3
+; RUN: FileCheck --input-file=3.map %s --check-prefix=ARCHIVE --implicit-check-not={{.}} --match-full-lines --strict-whitespace
+
+;      ARCHIVE:thin/a.min.o=obj/a.o
+; ARCHIVE-NEXT:bc.min.o=bc.o
+; ARCHIVE-NEXT:thin/empty1.min.o=obj/empty1.o
+; ARCHIVE-NEXT:empty.min.o=/dev/null
+
+; RUN: ld.lld --thinlto-index=4.map --thinlto-emit-imports-files --thinlto-prefix-replace='thin;index;obj' \
+; RUN:   --thinlto-object-suffix-replace='.min.o;.o' main.o thin/a.min.o \
+; RUN:   --start-lib thin/bc.min.o thin/empty.min.o --end-lib thin/empty1.min.o --start-lib c.o --end-lib -o 4
+; RUN: FileCheck --input-file=4.map %s --implicit-check-not={{.}} --match-full-lines --strict-whitespace
+; RUN: ls index/a.o.thinlto.bc index/a.o.imports
+
+;--- a.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @b()
+declare void @c()
+
+define void @a() {
+  call void () @b()
+  call void () @c()
+  ret void
+}
+
+;--- bc.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @b() {
+  ret void
+}
+define void @c() {
+  ret void
+}
+
+;--- main.s
+.globl _start
+_start:
+  call a
+
+;--- c.s
+.globl c
+c:
diff --git a/lld/test/ELF/lto/thinlto-single-module.ll b/lld/test/ELF/lto/thinlto-single-module.ll
index 0530213193510f..934766480b3107 100644
--- a/lld/test/ELF/lto/thinlto-single-module.ll
+++ b/lld/test/ELF/lto/thinlto-single-module.ll
@@ -48,9 +48,20 @@
 ; RUN: ls | FileCheck --implicit-check-not='thin.{{.*}}.thinlto.bc' /dev/null
 ; RUN: FileCheck %s --check-prefix=IDX < single5.idx
 ; RUN: count 1 < single5.idx
+; RUN: rm main.o.thinlto.bc
 
 ; IDX: main.o
 
+; RUN: ld.lld main.o thin.a --thinlto-single-module=main.o --thinlto-index=single5.map
+; RUN: ls main.o.thinlto.bc
+; RUN: ls | FileCheck --implicit-check-not='thin.{{.*}}.thinlto.bc' /dev/null
+; RUN: FileCheck --input-file=single5.map %s --check-prefix=REMAP --implicit-check-not={{.}}
+
+;; Currently the --thinlto-index= file is not affected by --thinlto-single-module.
+; REMAP:      main.o=main.o
+; REMAP-NEXT: thin1.o=thin1.o
+; REMAP-NEXT: thin2.o=thin2.o
+
 ;; Check temporary output generated for main.o only.
 ; RUN: ld.lld main.o thin.a --thinlto-single-module=main.o --save-temps
 ; RUN: ls main.o.0.preopt.bc

``````````

</details>


https://github.com/llvm/llvm-project/pull/109534


More information about the llvm-commits mailing list