[lld] [LLD][COFF] Handle emulated-TLS symbols generated by LTO (PR #145310)

Tomohiro Kashiwada via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 23 04:16:43 PDT 2025


https://github.com/kikairoya created https://github.com/llvm/llvm-project/pull/145310

LLD normally performs auto-import and auto-export analysis before LTO, because LTO needs to know which symbols should be preserved. However, when using emulated-TLS mode, LTO runs a LLVM pass named `LowerEmuTLS`, which transforms the TLS variables. This can result in symbols being added or removed after the initial analysis:

- Symbols generated by `LowerEmuTLS` (e.g. `__emutls_v.var`) are not marked for import or export.
- Original TLS variables (`var`) removed by `LowerEmuTLS` may still be referenced by the GC, leading to `undefined symbol` errors during linking.

This patch introduces special handling around LTO for the emulated-TLS case:

1. Skip checking for unresolvable symbols since symbol table may be modified by LTO.
2. Skip auto-export before LTO; instead, mark symbols eligible for auto-export as "used" so they are preserved during LTO.
3. If `__emutls_v.var` is marked as used, also mark `var` as used, so it isn't prematurely discarded by LTO.
4. Re-run auto-importing after LTO, as the generated emulated-TLS symbols may need to be auto-imported.
5. Run auto-exporting after post-LTO auto-importing, to ensure newly generated symbols are exported properly.

These changes are applied only when both LTO and emulated TLS are enabled.

Resolves https://github.com/llvm/llvm-project/issues/145195 https://github.com/llvm/llvm-project/issues/79715

>From 03e5e92c552d0173aaa8efa57c28abe9520e7315 Mon Sep 17 00:00:00 2001
From: kikairoya <kikairoya at gmail.com>
Date: Mon, 23 Jun 2025 01:03:44 +0900
Subject: [PATCH] [LLD][COFF] Handle emulated-TLS symbols generated by LTO

LLD normally performs auto-import and auto-export analysis before LTO,
because LTO needs to know which symbols should be preserved.
However, when using emulated-TLS mode, LTO runs a LLVM pass named
`LowerEmuTLS`, which transforms the TLS variables.
This can result in symbols being added or removed after the initial analysis:

- Symbols generated by `LowerEmuTLS` (e.g. `__emutls_v.var`) are not marked for import or export.
- Original TLS variables (`var`) removed by `LowerEmuTLS` may still be referenced by the GC, leading to `undefined symbol` errors during linking.

This patch introduces special handling around LTO for the emulated-TLS case:

1. Skip checking for unresolvable symbols since symbol table may be modified by LTO.
2. Skip auto-export before LTO; instead, mark symbols eligible for auto-export as "used" so they are preserved during LTO.
3. If `__emutls_v.var` is marked as used, also mark `var` as used, so it isn't prematurely discarded by LTO.
4. Re-run auto-importing after LTO, as the generated emulated-TLS symbols may need to be auto-imported.
5. Run auto-exporting after post-LTO auto-importing, to ensure newly generated symbols are exported properly.

These changes are applied only when both LTO and emulated TLS are enabled.
---
 lld/COFF/Driver.cpp             |  52 ++++++++++---
 lld/COFF/Driver.h               |   3 +-
 lld/test/COFF/lto-emutls-dll.ll | 133 ++++++++++++++++++++++++++++++++
 3 files changed, 178 insertions(+), 10 deletions(-)
 create mode 100644 lld/test/COFF/lto-emutls-dll.ll

diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index f3240b22a1442..bdd0886937c04 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -1384,7 +1384,8 @@ void LinkerDriver::pullArm64ECIcallHelper() {
 // explicitly specified. The automatic behavior can be disabled using the
 // -exclude-all-symbols option, so that lld-link behaves like link.exe rather
 // than MinGW in the case that nothing is explicitly exported.
-void LinkerDriver::maybeExportMinGWSymbols(const opt::InputArgList &args) {
+void LinkerDriver::maybeExportMinGWSymbols(const opt::InputArgList &args,
+                                           bool preLTOCheck) {
   if (!args.hasArg(OPT_export_all_symbols)) {
     if (!ctx.config.dll)
       return;
@@ -1415,7 +1416,7 @@ void LinkerDriver::maybeExportMinGWSymbols(const opt::InputArgList &args) {
       if (!exporter.shouldExport(def))
         return;
 
-      if (!def->isGCRoot) {
+      if (!preLTOCheck && !def->isGCRoot) {
         def->isGCRoot = true;
         ctx.config.gcroot.push_back(def);
       }
@@ -1427,7 +1428,8 @@ void LinkerDriver::maybeExportMinGWSymbols(const opt::InputArgList &args) {
         if (!(c->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE))
           e.data = true;
       s->isUsedInRegularObj = true;
-      symtab.exports.push_back(e);
+      if (!preLTOCheck)
+        symtab.exports.push_back(e);
     });
   });
 }
@@ -2580,6 +2582,16 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
     ctx.forEachActiveSymtab(
         [&](SymbolTable &symtab) { symtab.addUndefinedGlob(pat); });
 
+  // combination of LTO and EmuTLS needs special handlings
+  const bool isEmuTLS = llvm::is_contained(config->mllvmOpts, "-emulated-tls");
+  const bool isLTO = [&]() {
+    bool isLTO = false;
+    ctx.forEachSymtab([&](SymbolTable &symtab) {
+      isLTO |= !symtab.bitcodeFileInstances.empty();
+    });
+    return isLTO;
+  }();
+
   // Create wrapped symbols for -wrap option.
   ctx.forEachSymtab([&](SymbolTable &symtab) {
     addWrappedSymbols(symtab, args);
@@ -2618,11 +2630,11 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   // If we are going to do codegen for link-time optimization, check for
   // unresolvable symbols first, so we don't spend time generating code that
   // will fail to link anyway.
-  if (!config->forceUnresolved)
-    ctx.forEachSymtab([](SymbolTable &symtab) {
-      if (!symtab.bitcodeFileInstances.empty())
-        symtab.reportUnresolvable();
-    });
+  // This check cannot be appllied if emulated TLS enabled since LTO invokes
+  // LLVM passes that modify symbol set.
+  if (isLTO && !isEmuTLS && !config->forceUnresolved)
+    ctx.forEachSymtab(
+        [&](SymbolTable &symtab) { symtab.reportUnresolvable(); });
   if (errorCount())
     return;
 
@@ -2632,7 +2644,20 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   if (config->mingw) {
     // In MinGW, all symbols are automatically exported if no symbols
     // are chosen to be exported.
-    maybeExportMinGWSymbols(args);
+    maybeExportMinGWSymbols(args, isLTO && isEmuTLS);
+  }
+
+  // retain VAR if __emutls_v.VAR is retained
+  if (isLTO && isEmuTLS) {
+    ctx.forEachActiveSymtab([&](SymbolTable &symtab) {
+      symtab.forEachSymbol([&](Symbol *s) {
+        auto name = s->getName();
+        if (!name.starts_with("__emutls_v.") || !s->isUsedInRegularObj)
+          return;
+        if (Symbol *tls = symtab.find(name.substr(strlen("__emutls_v."))))
+          tls->isUsedInRegularObj = true;
+      });
+    });
   }
 
   // Do LTO by compiling bitcode input files to a set of native COFF files then
@@ -2641,6 +2666,15 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
   ltoCompilationDone = true;
   ctx.forEachSymtab([](SymbolTable &symtab) { symtab.compileBitcodeFiles(); });
 
+  // When emulated TLS enabled, LTO modifies symbol set. Secondly check is
+  // needed here.
+  if (isLTO && isEmuTLS && config->mingw) {
+    if (config->autoImport || config->stdcallFixup) {
+      ctx.forEachSymtab([](SymbolTable &symtab) { symtab.loadMinGWSymbols(); });
+    }
+    maybeExportMinGWSymbols(args, false);
+  }
+
   if (Defined *d =
           dyn_cast_or_null<Defined>(ctx.symtab.findUnderscore("_tls_used")))
     config->gcroot.push_back(d);
diff --git a/lld/COFF/Driver.h b/lld/COFF/Driver.h
index 14c97a98875bf..80c4a032d0bfc 100644
--- a/lld/COFF/Driver.h
+++ b/lld/COFF/Driver.h
@@ -161,7 +161,8 @@ class LinkerDriver {
   // trees into one resource tree.
   void convertResources();
 
-  void maybeExportMinGWSymbols(const llvm::opt::InputArgList &args);
+  void maybeExportMinGWSymbols(const llvm::opt::InputArgList &args,
+                               bool preLTOCheck);
 
   // We don't want to add the same file more than once.
   // Files are uniquified by their filesystem and file number.
diff --git a/lld/test/COFF/lto-emutls-dll.ll b/lld/test/COFF/lto-emutls-dll.ll
new file mode 100644
index 0000000000000..3014cc0f9f811
--- /dev/null
+++ b/lld/test/COFF/lto-emutls-dll.ll
@@ -0,0 +1,133 @@
+; REQUIRES: x86
+; RUN: split-file %s %t.dir
+
+; RUN: llvm-as %t.dir/main.ll -o %t.main.bc
+; RUN: llvm-as %t.dir/other.ll -o %t.other.bc
+; RUN: llc --emulated-tls %t.dir/main.ll -o %t.main.obj --filetype=obj
+; RUN: llc --emulated-tls %t.dir/other.ll -o %t.other.obj --filetype=obj
+; RUN: llc --emulated-tls %t.dir/runtime.ll -o %t.runtime.obj --filetype=obj
+
+; RUN: ld.lld -m i386pep -plugin-opt=mcpu=x86-64 -plugin-opt=-emulated-tls %t.main.obj %t.other.obj %t.runtime.obj -entry __main -o %t.exe
+; RUN: ld.lld -m i386pep -plugin-opt=mcpu=x86-64 -plugin-opt=-emulated-tls %t.main.bc  %t.other.bc  %t.runtime.obj -entry __main -o %t.exe
+; RUN: ld.lld -m i386pep -plugin-opt=mcpu=x86-64 -plugin-opt=-emulated-tls %t.main.bc  %t.other.obj %t.runtime.obj -entry __main -o %t.exe
+; RUN: ld.lld -m i386pep -plugin-opt=mcpu=x86-64 -plugin-opt=-emulated-tls %t.main.obj %t.other.bc  %t.runtime.obj -entry __main -o %t.exe
+; RUN: ld.lld -m i386pep -plugin-opt=mcpu=x86-64 -plugin-opt=-emulated-tls -shared %t.other.bc %t.runtime.obj -o %t.dll --out-implib=%t.lib
+; RUN: llvm-readobj --coff-exports %t.dll | FileCheck %s
+; RUN: ld.lld -m i386pep -plugin-opt=mcpu=x86-64 -plugin-opt=-emulated-tls %t.main.bc %t.dll %t.runtime.obj -entry __main -o %t.exe
+; RUN: llvm-readobj --coff-imports %t.exe | FileCheck %s
+; RUN: ld.lld -m i386pep -plugin-opt=mcpu=x86-64 -plugin-opt=-emulated-tls %t.main.bc %t.lib %t.runtime.obj -entry __main -o %t.exe
+; RUN: llvm-readobj --coff-imports %t.exe | FileCheck %s
+
+; CHECK: _Z11set_tls_vari
+; CHECK: __emutls_v.tls_var
+
+;--- main.ll
+;; generated from:
+;;;extern int thread_local tls_var;
+;;;void set_tls_var(int v);
+;;;int main(int argc, char **argv) {
+;;;  set_tls_var(3);
+;;;  return tls_var == argc;
+;;;}
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-w64-windows-gnu"
+
+$_ZTW7tls_var = comdat any
+
+ at tls_var = external dso_local thread_local global i32, align 4
+
+; Function Attrs: mustprogress noinline norecurse optnone uwtable
+define dso_local noundef i32 @__main(i32 noundef %0, ptr noundef %1) #0 {
+  %3 = alloca i32, align 4
+  %4 = alloca i32, align 4
+  %5 = alloca ptr, align 8
+  store i32 0, ptr %3, align 4
+  store i32 %0, ptr %4, align 4
+  store ptr %1, ptr %5, align 8
+  call void @_Z11set_tls_vari(i32 noundef 3)
+  %6 = call ptr @_ZTW7tls_var()
+  %7 = load i32, ptr %6, align 4
+  %8 = load i32, ptr %4, align 4
+  %9 = icmp eq i32 %7, %8
+  %10 = zext i1 %9 to i32
+  ret i32 %10
+}
+
+declare dso_local void @_Z11set_tls_vari(i32 noundef) #1
+
+; Function Attrs: noinline uwtable
+define linkonce_odr hidden noundef ptr @_ZTW7tls_var() #2 comdat {
+  %1 = icmp ne ptr @_ZTH7tls_var, null
+  br i1 %1, label %2, label %3
+
+2:                                                ; preds = %0
+  call void @_ZTH7tls_var()
+  br label %3
+
+3:                                                ; preds = %2, %0
+  %4 = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @tls_var)
+  ret ptr %4
+}
+
+declare extern_weak void @_ZTH7tls_var() #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #3
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ThinLTO", i32 0}
+
+;--- other.ll
+;; generated from:
+;;;int thread_local tls_var;
+;;;void set_tls_var(int v) { tls_var = v; }
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-w64-windows-gnu"
+
+$_ZTW7tls_var = comdat any
+
+ at tls_var = dso_local thread_local global i32 0, align 4
+
+; Function Attrs: mustprogress noinline nounwind optnone uwtable
+define dso_local void @_Z11set_tls_vari(i32 noundef %0) #0 {
+  %2 = alloca i32, align 4
+  store i32 %0, ptr %2, align 4
+  %3 = load i32, ptr %2, align 4
+  %4 = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @tls_var)
+  store i32 %3, ptr %4, align 4
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1
+
+; Function Attrs: noinline uwtable
+define weak_odr hidden noundef ptr @_ZTW7tls_var() #2 comdat {
+  %1 = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @tls_var)
+  ret ptr %1
+}
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ThinLTO", i32 0}
+
+;--- runtime.ll
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-w64-windows-gnu"
+
+define dso_local hidden noundef i32 @__emutls_get_address() {
+  ret i32 0
+}
+
+define dso_local hidden noundef i32 @__emutls_register_common() {
+  ret i32 0
+}
+
+define dso_local hidden noundef i32 @_pei386_runtime_relocator() {
+  ret i32 0
+}
+
+define dso_local hidden noundef i32 @_DllMainCRTStartup() {
+  ret i32 0
+}



More information about the llvm-commits mailing list