[lld] [LLD][COFF] Deduplicate common chunks when linking COFF files. (PR #162553)

Joshua Cranmer via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 9 07:23:52 PDT 2025


https://github.com/jcranmer-intel updated https://github.com/llvm/llvm-project/pull/162553

>From 64f6c57d0fd8ba023b7026bbf95d53317a430585 Mon Sep 17 00:00:00 2001
From: Joshua Cranmer <joshua.cranmer at intel.com>
Date: Mon, 6 Oct 2025 13:42:59 -0700
Subject: [PATCH 1/3] [LLD][COFF] Deduplicate common chunks when linking COFF
 files.

This fixes issue 162148.

Common symbols are intended to have only a single version of the data
present in the final executable. The MSVC linker is able to successfully
deduplicate these chunks. If you have an application with a large number
of translation units with a large block of common data (this is
possible, for example, with Fortran code), then failing to deduplicate
these chunks can make the data size so large that the resulting
executable fails to load.

The logic in this patch doesn't catch all of the potential cases for
deduplication, but it should catch the most common ones.
---
 lld/COFF/Chunks.cpp | 2 +-
 lld/COFF/Chunks.h   | 2 ++
 lld/COFF/Symbols.h  | 2 ++
 lld/COFF/Writer.cpp | 4 ++++
 4 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index cb5cba5c414a1..a0b43537ff598 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -777,7 +777,7 @@ uint32_t SectionChunk::getSectionNumber() const {
   return s.getIndex() + 1;
 }
 
-CommonChunk::CommonChunk(const COFFSymbolRef s) : sym(s) {
+CommonChunk::CommonChunk(const COFFSymbolRef s) : active(false), sym(s) {
   // The value of a common symbol is its size. Align all common symbols smaller
   // than 32 bytes naturally, i.e. round the size up to the next power of two.
   // This is what MSVC link.exe does.
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index 6d88f5ec73776..05f59aba43cea 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -522,6 +522,8 @@ class CommonChunk : public NonSectionChunk {
   uint32_t getOutputCharacteristics() const override;
   StringRef getSectionName() const override { return ".bss"; }
 
+  bool active;
+
 private:
   const COFFSymbolRef sym;
 };
diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h
index c86ded860876b..d41613ef6879f 100644
--- a/lld/COFF/Symbols.h
+++ b/lld/COFF/Symbols.h
@@ -236,6 +236,8 @@ class DefinedCommon : public DefinedCOFF {
                 CommonChunk *c = nullptr)
       : DefinedCOFF(DefinedCommonKind, f, n, s), data(c), size(size) {
     this->isExternal = true;
+    if (c)
+      c->active = true;
   }
 
   static bool classof(const Symbol *s) {
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 258a82e371f3a..0ee24de4e6f8a 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -1114,6 +1114,10 @@ void Writer::createSections() {
         sc->printDiscardedMessage();
       continue;
     }
+    if (auto *cc = dyn_cast<CommonChunk>(c)) {
+      if (!cc->active)
+        continue;
+    }
     StringRef name = c->getSectionName();
     if (shouldStripSectionSuffix(sc, name, ctx.config.mingw))
       name = name.split('$').first;

>From 543500a9630182e28a261f86019c75f1ea9598b8 Mon Sep 17 00:00:00 2001
From: Joshua Cranmer <joshua.cranmer at intel.com>
Date: Wed, 8 Oct 2025 15:41:42 -0700
Subject: [PATCH 2/3] Add a test for deduplication.

---
 lld/test/COFF/common-dedup.ll | 38 +++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 lld/test/COFF/common-dedup.ll

diff --git a/lld/test/COFF/common-dedup.ll b/lld/test/COFF/common-dedup.ll
new file mode 100644
index 0000000000000..9a4071ab08ff8
--- /dev/null
+++ b/lld/test/COFF/common-dedup.ll
@@ -0,0 +1,38 @@
+; REQUIRES: x86
+; RUN: split-file %s %t.dir
+; RUN: llc %t.dir/t1.ll -o %t.t1.obj --filetype=obj
+; RUN: llc %t.dir/t2.ll -o %t.t2.obj --filetype=obj
+; RUN: lld-link %t.t1.obj %t.t2.obj  -entry:main -out:%t.exe
+; RUN: llvm-readobj --section-headers %t.exe | FileCheck %s
+
+; Make sure that the data section contains just one copy of @a, not two.
+; CHECK: Name: .data
+; CHECK-NEXT: VirtualSize: 0x1000
+
+;--- t1.ll
+target triple = "x86_64-pc-windows-msvc"
+ at a = common global [4096 x i8] zeroinitializer
+
+define i32 @usea() {
+  %ref_common = load i32, ptr @a
+  ret i32 %ref_common
+}
+
+;--- t2.ll
+target triple = "x86_64-pc-windows-msvc"
+ at a = common global [4096 x i8] zeroinitializer
+
+define i32 @useb() {
+  %ref_common = load i32, ptr @a
+  ret i32 %ref_common
+}
+
+declare i32 @usea()
+
+define dso_local i32 @main() local_unnamed_addr {
+entry:
+  %a = tail call i32 @usea()
+  %b = tail call i32 @useb()
+  %add = add nsw i32 %a, %b
+  ret i32 %add
+}

>From a328771245cfc4ac2a996ee8d0cd86d620ab80db Mon Sep 17 00:00:00 2001
From: Joshua Cranmer <joshua.cranmer at intel.com>
Date: Thu, 9 Oct 2025 07:23:23 -0700
Subject: [PATCH 3/3] s/active/live/g

---
 lld/COFF/Chunks.cpp | 2 +-
 lld/COFF/Chunks.h   | 2 +-
 lld/COFF/Symbols.h  | 2 +-
 lld/COFF/Writer.cpp | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index a0b43537ff598..548d87bdaefe5 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -777,7 +777,7 @@ uint32_t SectionChunk::getSectionNumber() const {
   return s.getIndex() + 1;
 }
 
-CommonChunk::CommonChunk(const COFFSymbolRef s) : active(false), sym(s) {
+CommonChunk::CommonChunk(const COFFSymbolRef s) : live(false), sym(s) {
   // The value of a common symbol is its size. Align all common symbols smaller
   // than 32 bytes naturally, i.e. round the size up to the next power of two.
   // This is what MSVC link.exe does.
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index 05f59aba43cea..cf8857dc57305 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -522,7 +522,7 @@ class CommonChunk : public NonSectionChunk {
   uint32_t getOutputCharacteristics() const override;
   StringRef getSectionName() const override { return ".bss"; }
 
-  bool active;
+  bool live;
 
 private:
   const COFFSymbolRef sym;
diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h
index d41613ef6879f..9f077ddb2bb72 100644
--- a/lld/COFF/Symbols.h
+++ b/lld/COFF/Symbols.h
@@ -237,7 +237,7 @@ class DefinedCommon : public DefinedCOFF {
       : DefinedCOFF(DefinedCommonKind, f, n, s), data(c), size(size) {
     this->isExternal = true;
     if (c)
-      c->active = true;
+      c->live = true;
   }
 
   static bool classof(const Symbol *s) {
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 0ee24de4e6f8a..3ac26681541ba 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -1115,7 +1115,7 @@ void Writer::createSections() {
       continue;
     }
     if (auto *cc = dyn_cast<CommonChunk>(c)) {
-      if (!cc->active)
+      if (!cc->live)
         continue;
     }
     StringRef name = c->getSectionName();



More information about the llvm-commits mailing list