[lld] [LLD][COFF] add __{data,bss}_{start,end}__ symbols for Cygwin support (PR #136180)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 23 11:05:03 PDT 2025
https://github.com/jeremyd2019 updated https://github.com/llvm/llvm-project/pull/136180
>From 05747de8c8ee350edf90f8de3560ccd82de61649 Mon Sep 17 00:00:00 2001
From: Jeremy Drake <github at jdrake.com>
Date: Sun, 13 Apr 2025 22:41:05 -0700
Subject: [PATCH] [LLD][COFF] add __{data,bss}_{start,end}__ symbols for Cygwin
support
Cygwin requires these symbols for its fork emulation to know what data
to copy into the child. GNU ld defines these symbols for MinGW targets
also, so do the same here.
Cygwin also has the .data_cygwin_nocopy section, which is merged into
.data outside the __data_start__ to __data_end__ range. This excludes
it from fork's copying. AFAIK it's only used by the Cygwin DLL itself
(which requires a custom linker script to link, that's not supported by
LLD), but the section is included in GNU ld's default linker script so
handle it here too.
Signed-off-by: Jeremy Drake <github at jdrake.com>
---
lld/COFF/Driver.cpp | 5 ++
lld/COFF/Writer.cpp | 33 ++++++++++-
lld/test/COFF/cygwin-symbols.s | 100 +++++++++++++++++++++++++++++++++
3 files changed, 136 insertions(+), 2 deletions(-)
create mode 100644 lld/test/COFF/cygwin-symbols.s
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 7aa13bdce488e..72e25634c19d8 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -2039,6 +2039,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
parseMerge(".ctors=.rdata");
parseMerge(".dtors=.rdata");
parseMerge(".CRT=.rdata");
+ parseMerge(".data_cygwin_nocopy=.data");
}
// Handle /section
@@ -2495,6 +2496,10 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
if (config->mingw) {
symtab.addAbsolute(symtab.mangle("__CTOR_LIST__"), 0);
symtab.addAbsolute(symtab.mangle("__DTOR_LIST__"), 0);
+ symtab.addAbsolute("__data_start__", 0);
+ symtab.addAbsolute("__data_end__", 0);
+ symtab.addAbsolute("__bss_start__", 0);
+ symtab.addAbsolute("__bss_end__", 0);
}
if (config->debug || config->buildIDHash != BuildIDHash::None)
if (symtab.findUnderscore("__buildid"))
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 6ed1f884a9636..32a480e3126e2 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -239,6 +239,7 @@ class Writer {
void createRuntimePseudoRelocs();
void createECChunks();
void insertCtorDtorSymbols();
+ void insertBssDataStartEndSymbols();
void markSymbolsWithRelocations(ObjFile *file, SymbolRVASet &usedSymbols);
void createGuardCFTables();
void markSymbolsForRVATable(ObjFile *file,
@@ -314,6 +315,7 @@ class Writer {
OutputSection *textSec;
OutputSection *hexpthkSec;
+ OutputSection *bssSec;
OutputSection *rdataSec;
OutputSection *buildidSec;
OutputSection *dataSec;
@@ -1077,7 +1079,7 @@ void Writer::createSections() {
textSec = createSection(".text", code | r | x);
if (isArm64EC(ctx.config.machine))
hexpthkSec = createSection(".hexpthk", code | r | x);
- createSection(".bss", bss | r | w);
+ bssSec = createSection(".bss", bss | r | w);
rdataSec = createSection(".rdata", data | r);
buildidSec = createSection(".buildid", data | r);
dataSec = createSection(".data", data | r | w);
@@ -1260,8 +1262,10 @@ void Writer::createMiscChunks() {
if (config->autoImport)
createRuntimePseudoRelocs();
- if (config->mingw)
+ if (config->mingw) {
insertCtorDtorSymbols();
+ insertBssDataStartEndSymbols();
+ }
}
// Create .idata section for the DLL-imported symbol table.
@@ -2369,6 +2373,31 @@ void Writer::insertCtorDtorSymbols() {
}
}
+// MinGW (really, Cygwin) specific.
+// The Cygwin startup code uses __data_start__ __data_end__ __bss_start__
+// and __bss_end__ to know what to copy during fork emulation.
+void Writer::insertBssDataStartEndSymbols() {
+ if (!dataSec->chunks.empty()) {
+ Symbol *dataStartSym = ctx.symtab.find("__data_start__");
+ Symbol *dataEndSym = ctx.symtab.find("__data_end__");
+ Chunk *endChunk = dataSec->chunks.back();
+ replaceSymbol<DefinedSynthetic>(dataStartSym, dataStartSym->getName(),
+ dataSec->chunks.front());
+ replaceSymbol<DefinedSynthetic>(dataEndSym, dataEndSym->getName(), endChunk,
+ endChunk->getSize());
+ }
+
+ if (!bssSec->chunks.empty()) {
+ Symbol *bssStartSym = ctx.symtab.find("__bss_start__");
+ Symbol *bssEndSym = ctx.symtab.find("__bss_end__");
+ Chunk *endChunk = bssSec->chunks.back();
+ replaceSymbol<DefinedSynthetic>(bssStartSym, bssStartSym->getName(),
+ bssSec->chunks.front());
+ replaceSymbol<DefinedSynthetic>(bssEndSym, bssEndSym->getName(), endChunk,
+ endChunk->getSize());
+ }
+}
+
// Handles /section options to allow users to overwrite
// section attributes.
void Writer::setSectionPermissions() {
diff --git a/lld/test/COFF/cygwin-symbols.s b/lld/test/COFF/cygwin-symbols.s
new file mode 100644
index 0000000000000..586790160220e
--- /dev/null
+++ b/lld/test/COFF/cygwin-symbols.s
@@ -0,0 +1,100 @@
+# REQUIRES: x86
+# RUN: split-file %s %t.dir && cd %t.dir
+
+# RUN: llvm-mc -triple=x86_64-windows-cygnus -filetype=obj -o data-no-bss.obj data-no-bss.s
+# RUN: lld-link -lldmingw -entry:main data-no-bss.obj -out:data-no-bss.exe
+# RUN: llvm-objdump -s data-no-bss.exe | FileCheck --check-prefix=DATANOBSS %s
+
+# RUN: llvm-mc -triple=x86_64-windows-cygnus -filetype=obj -o bss-no-data.obj bss-no-data.s
+# RUN: lld-link -lldmingw -entry:main bss-no-data.obj -out:bss-no-data.exe
+# RUN: llvm-objdump -s bss-no-data.exe | FileCheck --check-prefix=BSSNODATA %s
+
+# RUN: llvm-mc -triple=x86_64-windows-cygnus -filetype=obj -o data-and-bss.obj data-and-bss.s
+# RUN: lld-link -lldmingw -entry:main data-and-bss.obj -out:data-and-bss.exe
+# RUN: llvm-objdump -s data-and-bss.exe | FileCheck --check-prefix=DATAANDBSS %s
+
+#--- data-no-bss.s
+.globl main
+main:
+ nop
+
+.data
+ .quad 1
+ .byte 2
+
+.section .data_cygwin_nocopy, "w"
+ .align 4
+ .quad 3
+ .byte 4
+
+.section .test, "w"
+ .quad __data_start__
+ .quad __data_end__
+ .quad __bss_start__
+ .quad __bss_end__
+
+#--- bss-no-data.s
+.globl main
+main:
+ nop
+
+.bss
+ .quad 0
+ .byte 0
+
+.section .test, "w"
+ .quad __data_start__
+ .quad __data_end__
+ .quad __bss_start__
+ .quad __bss_end__
+
+#--- data-and-bss.s
+.globl main
+main:
+ nop
+
+.data
+ .quad 1
+ .byte 2
+
+.section .data_cygwin_nocopy, "w"
+ .align 4
+ .quad 3
+ .byte 4
+
+.bss
+ .quad 0
+ .byte 0
+
+.section .test, "w"
+ .quad __data_start__
+ .quad __data_end__
+ .quad __bss_start__
+ .quad __bss_end__
+
+# DATANOBSS: Contents of section .data:
+# DATANOBSS-NEXT: 140003000 01000000 00000000 02000000 03000000
+# DATANOBSS-NEXT: 140003010 00000000 04
+# __data_start__ pointing at 0x140003000 and
+# __data_end__ pointing at 0x140003009.
+# DATANOBSS-NEXT: Contents of section .test:
+# DATANOBSS-NEXT: 140004000 00300040 01000000 09300040 01000000
+# DATANOBSS-NEXT: 140004010 0c300040 01000000 0c300040 01000000
+
+# __bss_start__ pointing at 0x140003000 and
+# __bss_end__ pointing at 0x140003009.
+# BSSNODATA: Contents of section .test:
+# BSSNODATA-NEXT: 140004000 00300040 01000000 00300040 01000000
+# BSSNODATA-NEXT: 140004010 00300040 01000000 09300040 01000000
+
+# DATAANDBSS: Contents of section .data:
+# DATAANDBSS-NEXT: 140003000 01000000 00000000 02000000 00000000
+# DATAANDBSS-NEXT: 140003010 00000000 00000000 03000000 00000000
+# DATAANDBSS-NEXT: 140003020 04
+# __data_start__ pointing at 0x140003000 and
+# __data_end__ pointing at 0x140003009.
+# __bss_start__ pointing at 0x14000300c and
+# __bss_end__ pointing at 0x140003015.
+# DATAANDBSS-NEXT: Contents of section .test:
+# DATAANDBSS-NEXT: 140004000 00300040 01000000 09300040 01000000
+# DATAANDBSS-NEXT: 140004010 0c300040 01000000 15300040 01000000
More information about the llvm-commits
mailing list