[lld] [LLD][COFF] add __{data,bss}_{start,end}__ symbols for Cygwin support (PR #136180)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 30 10:04:11 PDT 2025
https://github.com/jeremyd2019 updated https://github.com/llvm/llvm-project/pull/136180
>From fcd7a03ac178d1299a265153e1eea57ed33edfd7 Mon Sep 17 00:00:00 2001
From: Jeremy Drake <github at jdrake.com>
Date: Sun, 13 Apr 2025 22:41:05 -0700
Subject: [PATCH] [LLD][COFF] add __{data,bss}_{start,end}__ symbols for Cygwin
support
Cygwin requires these symbols for its fork emulation to know what data
to copy into the child. GNU ld defines these symbols for MinGW targets
also, so do the same here.
Cygwin also has the .data_cygwin_nocopy section, which is merged into
.data outside the __data_start__ to __data_end__ range. This excludes
it from fork's copying. AFAIK it's only used by the Cygwin DLL itself
(which requires a custom linker script to link, that's not supported by
LLD), but the section is included in GNU ld's default linker script so
handle it here too.
Signed-off-by: Jeremy Drake <github at jdrake.com>
---
lld/COFF/Driver.cpp | 5 ++
lld/COFF/Writer.cpp | 33 +++++++++++-
lld/test/COFF/cygwin-symbols.s | 99 ++++++++++++++++++++++++++++++++++
3 files changed, 135 insertions(+), 2 deletions(-)
create mode 100644 lld/test/COFF/cygwin-symbols.s
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index e3ff647209e72..4c296da35d667 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -2028,6 +2028,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
parseMerge(".ctors=.rdata");
parseMerge(".dtors=.rdata");
parseMerge(".CRT=.rdata");
+ parseMerge(".data_cygwin_nocopy=.data");
}
// Handle /section
@@ -2484,6 +2485,10 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
if (config->mingw) {
symtab.addAbsolute(symtab.mangle("__CTOR_LIST__"), 0);
symtab.addAbsolute(symtab.mangle("__DTOR_LIST__"), 0);
+ symtab.addAbsolute("__data_start__", 0);
+ symtab.addAbsolute("__data_end__", 0);
+ symtab.addAbsolute("__bss_start__", 0);
+ symtab.addAbsolute("__bss_end__", 0);
}
if (config->debug || config->buildIDHash != BuildIDHash::None)
if (symtab.findUnderscore("__buildid"))
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index cf0a92bd0bcb4..f3cf4902e6ecf 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -240,6 +240,7 @@ class Writer {
void createRuntimePseudoRelocs();
void createECChunks();
void insertCtorDtorSymbols();
+ void insertBssDataStartEndSymbols();
void markSymbolsWithRelocations(ObjFile *file, SymbolRVASet &usedSymbols);
void createGuardCFTables();
void markSymbolsForRVATable(ObjFile *file,
@@ -315,6 +316,7 @@ class Writer {
OutputSection *textSec;
OutputSection *hexpthkSec;
+ OutputSection *bssSec;
OutputSection *rdataSec;
OutputSection *buildidSec;
OutputSection *dataSec;
@@ -1077,7 +1079,7 @@ void Writer::createSections() {
textSec = createSection(".text", code | r | x);
if (isArm64EC(ctx.config.machine))
hexpthkSec = createSection(".hexpthk", code | r | x);
- createSection(".bss", bss | r | w);
+ bssSec = createSection(".bss", bss | r | w);
rdataSec = createSection(".rdata", data | r);
buildidSec = createSection(".buildid", data | r);
dataSec = createSection(".data", data | r | w);
@@ -1260,8 +1262,10 @@ void Writer::createMiscChunks() {
if (config->autoImport)
createRuntimePseudoRelocs();
- if (config->mingw)
+ if (config->mingw) {
insertCtorDtorSymbols();
+ insertBssDataStartEndSymbols();
+ }
}
// Create .idata section for the DLL-imported symbol table.
@@ -2381,6 +2385,31 @@ void Writer::insertCtorDtorSymbols() {
}
}
+// MinGW (really, Cygwin) specific.
+// The Cygwin startup code uses __data_start__ __data_end__ __bss_start__
+// and __bss_end__ to know what to copy during fork emulation.
+void Writer::insertBssDataStartEndSymbols() {
+ if (!dataSec->chunks.empty()) {
+ Symbol *dataStartSym = ctx.symtab.find("__data_start__");
+ Symbol *dataEndSym = ctx.symtab.find("__data_end__");
+ Chunk *endChunk = dataSec->chunks.back();
+ replaceSymbol<DefinedSynthetic>(dataStartSym, dataStartSym->getName(),
+ dataSec->chunks.front());
+ replaceSymbol<DefinedSynthetic>(dataEndSym, dataEndSym->getName(), endChunk,
+ endChunk->getSize());
+ }
+
+ if (!bssSec->chunks.empty()) {
+ Symbol *bssStartSym = ctx.symtab.find("__bss_start__");
+ Symbol *bssEndSym = ctx.symtab.find("__bss_end__");
+ Chunk *endChunk = bssSec->chunks.back();
+ replaceSymbol<DefinedSynthetic>(bssStartSym, bssStartSym->getName(),
+ bssSec->chunks.front());
+ replaceSymbol<DefinedSynthetic>(bssEndSym, bssEndSym->getName(), endChunk,
+ endChunk->getSize());
+ }
+}
+
// Handles /section options to allow users to overwrite
// section attributes.
void Writer::setSectionPermissions() {
diff --git a/lld/test/COFF/cygwin-symbols.s b/lld/test/COFF/cygwin-symbols.s
new file mode 100644
index 0000000000000..d1aec4278425c
--- /dev/null
+++ b/lld/test/COFF/cygwin-symbols.s
@@ -0,0 +1,99 @@
+# REQUIRES: x86
+# RUN: split-file %s %t.dir && cd %t.dir
+
+# RUN: llvm-mc -triple=x86_64-windows-cygnus -filetype=obj -o data-no-bss.obj data-no-bss.s
+# RUN: lld-link -lldmingw -entry:main data-no-bss.obj -out:data-no-bss.exe
+# RUN: llvm-objdump -s data-no-bss.exe | FileCheck --check-prefix=DATANOBSS %s
+
+# RUN: llvm-mc -triple=x86_64-windows-cygnus -filetype=obj -o bss-no-data.obj bss-no-data.s
+# RUN: lld-link -lldmingw -entry:main bss-no-data.obj -out:bss-no-data.exe
+# RUN: llvm-objdump -s bss-no-data.exe | FileCheck --check-prefix=BSSNODATA %s
+
+# RUN: llvm-mc -triple=x86_64-windows-cygnus -filetype=obj -o data-and-bss.obj data-and-bss.s
+# RUN: lld-link -lldmingw -entry:main data-and-bss.obj -out:data-and-bss.exe
+# RUN: llvm-objdump -s data-and-bss.exe | FileCheck --check-prefix=DATAANDBSS %s
+
+#--- data-no-bss.s
+.globl main
+main:
+ nop
+
+.data
+ .quad 1
+ .byte 2
+
+.section .data_cygwin_nocopy, "w"
+ .align 4
+ .quad 3
+ .byte 4
+
+.section .test, "w"
+ .quad __data_start__
+ .quad __data_end__
+ .quad __bss_start__
+ .quad __bss_end__
+
+#--- bss-no-data.s
+.globl main
+main:
+ nop
+
+.bss
+ .zero 8192
+
+.section .test, "w"
+ .quad __data_start__
+ .quad __data_end__
+ .quad __bss_start__
+ .quad __bss_end__
+
+#--- data-and-bss.s
+.globl main
+main:
+ nop
+
+.data
+ .quad 1
+ .byte 2
+
+.section .data_cygwin_nocopy, "w"
+ .align 4
+ .quad 3
+ .byte 4
+
+.bss
+ .zero 8192
+
+.section .test, "w"
+ .quad __data_start__
+ .quad __data_end__
+ .quad __bss_start__
+ .quad __bss_end__
+
+# DATANOBSS: Contents of section .data:
+# DATANOBSS-NEXT: 140003000 01000000 00000000 02000000 03000000
+# DATANOBSS-NEXT: 140003010 00000000 04
+# __data_start__ pointing at 0x140003000 and
+# __data_end__ pointing at 0x140003009.
+# DATANOBSS-NEXT: Contents of section .test:
+# DATANOBSS-NEXT: 140004000 00300040 01000000 09300040 01000000
+# DATANOBSS-NEXT: 140004010 18300040 01000000 18300040 01000000
+
+# __bss_start__ pointing at 0x140003000 and
+# __bss_end__ pointing at 0x140005000.
+# BSSNODATA-NOT: Contents of section .data:
+# BSSNODATA: Contents of section .test:
+# BSSNODATA-NEXT: 140005000 00300040 01000000 00300040 01000000
+# BSSNODATA-NEXT: 140005010 00300040 01000000 00500040 01000000
+
+# DATAANDBSS: Contents of section .data:
+# DATAANDBSS-NEXT: 140003000 01000000 00000000 02000000 03000000
+# DATAANDBSS-NEXT: 140003010 00000000 04000000 00000000 00000000
+# __data_start__ pointing at 0x140003000 and
+# __data_end__ pointing at 0x140003009.
+# __bss_start__ pointing at 0x140003018 and
+# __bss_end__ pointing at 0x140005018.
+# DATAANDBSS: 1400031f0 00000000 00000000 00000000 00000000
+# DATAANDBSS-NEXT: Contents of section .test:
+# DATAANDBSS-NEXT: 140006000 00300040 01000000 09300040 01000000
+# DATAANDBSS-NEXT: 140006010 18300040 01000000 18500040 01000000
More information about the llvm-commits
mailing list