[lld] r240519 - COFF: Initial implementation of Identical COMDAT Folding.

Rui Ueyama ruiu at google.com
Tue Jun 23 21:36:53 PDT 2015


Author: ruiu
Date: Tue Jun 23 23:36:52 2015
New Revision: 240519

URL: http://llvm.org/viewvc/llvm-project?rev=240519&view=rev
Log:
COFF: Initial implementation of Identical COMDAT Folding.

Identical COMDAT Folding (ICF) is an optimization to reduce binary
size by merging COMDAT sections that contain the same metadata,
actual data and relocations. MSVC link.exe and many other linkers
have this feature. LLD achieves on per with MSVC in terms produced
binary size with this patch.

This technique is pretty effective. For example, LLD's size is
reduced from 64MB to 54MB by enaling this optimization.

The algorithm implemented in this patch is extremely inefficient.
It puts all COMDAT sections into a set to identify duplicates.
Time to self-link with/without ICF are 3.3 and 320 seconds,
respectively. So this option roughly makes LLD 100x slower.
But it's okay as I wanted to achieve correctness first.
LLD is still able to link itself with this optimization.
I'm going to make it more efficient in followup patches.

Note that this optimization is *not* entirely safe. C/C++ require
different functions have different addresses. If your program
relies on that property, your program wouldn't work with ICF.
However, it's not going to be an issue on Windows because MSVC
link.exe turns ICF on by default. As long as your program works
with default settings (or not passing /opt:noicf), your program
would work with LLD too.

Added:
    lld/trunk/test/COFF/Inputs/icf1.yaml
    lld/trunk/test/COFF/Inputs/icf2.yaml
    lld/trunk/test/COFF/Inputs/icf3.yaml
    lld/trunk/test/COFF/icf.test
Modified:
    lld/trunk/COFF/Chunks.cpp
    lld/trunk/COFF/Chunks.h
    lld/trunk/COFF/Config.h
    lld/trunk/COFF/Driver.cpp
    lld/trunk/COFF/InputFiles.cpp
    lld/trunk/COFF/Symbols.h
    lld/trunk/COFF/Writer.cpp
    lld/trunk/COFF/Writer.h

Modified: lld/trunk/COFF/Chunks.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/COFF/Chunks.cpp?rev=240519&r1=240518&r2=240519&view=diff
==============================================================================
--- lld/trunk/COFF/Chunks.cpp (original)
+++ lld/trunk/COFF/Chunks.cpp Tue Jun 23 23:36:52 2015
@@ -10,6 +10,7 @@
 #include "Chunks.h"
 #include "InputFiles.h"
 #include "Writer.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Support/COFF.h"
@@ -27,7 +28,7 @@ namespace lld {
 namespace coff {
 
 SectionChunk::SectionChunk(ObjectFile *F, const coff_section *H)
-    : File(F), Header(H) {
+    : File(F), Ptr(this), Header(H) {
   // Initialize SectionName.
   File->getCOFFObj()->getSectionName(Header, SectionName);
 
@@ -146,10 +147,16 @@ bool SectionChunk::isCOMDAT() const {
 }
 
 void SectionChunk::printDiscardedMessage() {
-  llvm::dbgs() << "Discarded " << Sym->getName() << "\n";
+  if (this == Ptr) {
+    // Removed by dead-stripping.
+    llvm::dbgs() << "Discarded " << Sym->getName() << "\n";
+  } else {
+    // Removed by ICF.
+    llvm::dbgs() << "Replaced " << Sym->getName() << "\n";
+  }
 }
 
-SectionRef SectionChunk::getSectionRef() {
+SectionRef SectionChunk::getSectionRef() const {
   DataRefImpl Ref;
   Ref.p = uintptr_t(Header);
   return SectionRef(Ref, File->getCOFFObj());
@@ -159,6 +166,70 @@ StringRef SectionChunk::getDebugName() {
   return Sym->getName();
 }
 
+uint64_t SectionChunk::getHash() const {
+  ArrayRef<uint8_t> A;
+  File->getCOFFObj()->getSectionContents(Header, A);
+  return hash_combine(getPermissions(), llvm::hash_value(SectionName),
+                      uint32_t(Header->SizeOfRawData),
+                      uint32_t(Header->NumberOfRelocations),
+                      hash_combine_range(A.data(), A.data() + A.size()));
+}
+
+// Returns true if this and a given chunk are identical COMDAT sections.
+bool SectionChunk::equals(const SectionChunk *X) const {
+  // Compare headers
+  if (getPermissions() != X->getPermissions())
+    return false;
+  if (SectionName != X->SectionName)
+    return false;
+  if (Header->SizeOfRawData != X->Header->SizeOfRawData)
+    return false;
+  if (Header->NumberOfRelocations != X->Header->NumberOfRelocations)
+    return false;
+
+  // Compare data
+  ArrayRef<uint8_t> A, B;
+  File->getCOFFObj()->getSectionContents(Header, A);
+  X->File->getCOFFObj()->getSectionContents(X->Header, B);
+  assert(A.size() == B.size());
+  if (memcmp(A.data(), B.data(), A.size()))
+    return false;
+
+  // Compare relocations
+  auto Range1 = getSectionRef().relocations();
+  auto Range2 = X->getSectionRef().relocations();
+  auto End = Range1.end();
+  for (auto I = Range1.begin(), J = Range2.begin(); I != End; ++I, ++J) {
+    const coff_relocation *Rel1 = File->getCOFFObj()->getCOFFRelocation(*I);
+    const coff_relocation *Rel2 = X->File->getCOFFObj()->getCOFFRelocation(*J);
+    if (Rel1->Type != Rel2->Type)
+      return false;
+    if (Rel1->VirtualAddress != Rel2->VirtualAddress)
+      return false;
+    SymbolBody *B1 = File->getSymbolBody(Rel1->SymbolTableIndex);
+    SymbolBody *B2 = X->File->getSymbolBody(Rel2->SymbolTableIndex);
+    if (auto *C1 = dyn_cast<DefinedCOMDAT>(B1))
+      if (auto *C2 = dyn_cast<DefinedCOMDAT>(B2))
+        if (C1->getChunk() == C2->getChunk())
+          continue;
+    if (B1 != B2)
+      return false;
+  }
+  return true;
+}
+
+// Returns a pointer to this chunk or its replacement.
+SectionChunk *SectionChunk::repl() {
+  while (Ptr != Ptr->Ptr)
+    Ptr = Ptr->Ptr;
+  return Ptr;
+}
+
+void SectionChunk::replaceWith(SectionChunk *Other) {
+  Ptr = Other;
+  Live = false;
+}
+
 CommonChunk::CommonChunk(const COFFSymbolRef S) : Sym(S) {
   // Common symbols are aligned on natural boundaries up to 32 bytes.
   // This is what MSVC link.exe does.

Modified: lld/trunk/COFF/Chunks.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/COFF/Chunks.h?rev=240519&r1=240518&r2=240519&view=diff
==============================================================================
--- lld/trunk/COFF/Chunks.h (original)
+++ lld/trunk/COFF/Chunks.h Tue Jun 23 23:36:52 2015
@@ -140,14 +140,27 @@ public:
   StringRef getDebugName() override;
   void setSymbol(DefinedCOMDAT *S) { if (!Sym) Sym = S; }
 
+  uint64_t getHash() const;
+  bool equals(const SectionChunk *Other) const;
+
+  // Used for ICF (Identical COMDAT Folding)
+  SectionChunk *repl();
+  void replaceWith(SectionChunk *Other);
+
 private:
   void mark() override;
-  SectionRef getSectionRef();
+  SectionRef getSectionRef() const;
   void applyReloc(uint8_t *Buf, const coff_relocation *Rel);
 
   // A file this chunk was created from.
   ObjectFile *File;
 
+  // A pointer pointing to a replacement for this chunk.
+  // Initially it points to "this" object. If this chunk is merged
+  // with other chunk by ICF, it points to another chunk,
+  // and this chunk is considrered as dead.
+  SectionChunk *Ptr;
+
   const coff_section *Header;
   StringRef SectionName;
   std::vector<Chunk *> AssocChildren;

Modified: lld/trunk/COFF/Config.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/COFF/Config.h?rev=240519&r1=240518&r2=240519&view=diff
==============================================================================
--- lld/trunk/COFF/Config.h (original)
+++ lld/trunk/COFF/Config.h Tue Jun 23 23:36:52 2015
@@ -62,6 +62,9 @@ struct Configuration {
   std::vector<Export> Exports;
   std::set<StringRef> DelayLoads;
 
+  // Used for /opt:icf
+  bool ICF = false;
+
   // Options for manifest files.
   ManifestKind Manifest = SideBySide;
   int ManifestID = 1;

Modified: lld/trunk/COFF/Driver.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/COFF/Driver.cpp?rev=240519&r1=240518&r2=240519&view=diff
==============================================================================
--- lld/trunk/COFF/Driver.cpp (original)
+++ lld/trunk/COFF/Driver.cpp Tue Jun 23 23:36:52 2015
@@ -360,6 +360,10 @@ bool LinkerDriver::link(llvm::ArrayRef<c
       Config->DoGC = false;
       continue;
     }
+    if (S == "icf") {
+      Config->ICF = true;
+      continue;
+    }
     if (S != "ref" && S != "icf" && S != "noicf" &&
         S != "lbr" && S != "nolbr" &&
         !StringRef(S).startswith("icf=")) {

Modified: lld/trunk/COFF/InputFiles.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/COFF/InputFiles.cpp?rev=240519&r1=240518&r2=240519&view=diff
==============================================================================
--- lld/trunk/COFF/InputFiles.cpp (original)
+++ lld/trunk/COFF/InputFiles.cpp Tue Jun 23 23:36:52 2015
@@ -216,11 +216,10 @@ SymbolBody *ObjectFile::createSymbolBody
   if (Chunk *C = SparseChunks[Sym.getSectionNumber()]) {
     if (!C->isCOMDAT())
       return new (Alloc) DefinedRegular(COFFObj.get(), Sym, C);
-    auto *B = new (Alloc) DefinedCOMDAT(COFFObj.get(), Sym, C);
-    if (Sym.getValue() == 0 && !AuxP) {
-      auto *SC = reinterpret_cast<SectionChunk *>(C);
+    auto *SC = reinterpret_cast<SectionChunk *>(C);
+    auto *B = new (Alloc) DefinedCOMDAT(COFFObj.get(), Sym, SC);
+    if (Sym.getValue() == 0 && !AuxP)
       SC->setSymbol(B);
-    }
     return B;
   }
   return nullptr;

Modified: lld/trunk/COFF/Symbols.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/COFF/Symbols.h?rev=240519&r1=240518&r2=240519&view=diff
==============================================================================
--- lld/trunk/COFF/Symbols.h (original)
+++ lld/trunk/COFF/Symbols.h Tue Jun 23 23:36:52 2015
@@ -140,25 +140,29 @@ private:
 
 class DefinedCOMDAT : public Defined {
 public:
-  DefinedCOMDAT(COFFObjectFile *F, COFFSymbolRef S, Chunk *C)
+  DefinedCOMDAT(COFFObjectFile *F, COFFSymbolRef S, SectionChunk *C)
       : Defined(DefinedCOMDATKind), COFFFile(F), Sym(S), Data(C) {}
 
   static bool classof(const SymbolBody *S) {
     return S->kind() == DefinedCOMDATKind;
   }
 
+  uint64_t getFileOff() override {
+    return Data->repl()->getFileOff() + Sym.getValue();
+  }
+
   StringRef getName() override;
-  uint64_t getRVA() override { return Data->getRVA() + Sym.getValue(); }
+  uint64_t getRVA() override { return Data->repl()->getRVA() + Sym.getValue(); }
   bool isExternal() override { return Sym.isExternal(); }
-  void markLive() override { Data->markLive(); }
-  uint64_t getFileOff() override { return Data->getFileOff() + Sym.getValue(); }
+  void markLive() override { Data->repl()->markLive(); }
   int compare(SymbolBody *Other) override;
+  Chunk *getChunk() { return Data->repl(); }
 
 private:
   StringRef Name;
   COFFObjectFile *COFFFile;
   COFFSymbolRef Sym;
-  Chunk *Data;
+  SectionChunk *Data;
 };
 
 class DefinedCommon : public Defined {

Modified: lld/trunk/COFF/Writer.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/COFF/Writer.cpp?rev=240519&r1=240518&r2=240519&view=diff
==============================================================================
--- lld/trunk/COFF/Writer.cpp (original)
+++ lld/trunk/COFF/Writer.cpp Tue Jun 23 23:36:52 2015
@@ -20,6 +20,7 @@
 #include <cstdio>
 #include <functional>
 #include <map>
+#include <unordered_set>
 #include <utility>
 
 using namespace llvm;
@@ -115,8 +116,47 @@ void Writer::markLive() {
       C->markLive();
 }
 
+struct Hasher {
+  size_t operator()(const SectionChunk *C) const { return C->getHash(); }
+};
+
+struct Equals {
+  bool operator()(const SectionChunk *A, const SectionChunk *B) const {
+    return A->equals(B);
+  }
+};
+
+// Merge identical COMDAT sections.
+// Two sections are considered as identical when their section headers,
+// contents and relocations are all the same.
+void Writer::dedupCOMDATs() {
+  std::vector<SectionChunk *> V;
+  for (Chunk *C : Symtab->getChunks())
+    if (C->isCOMDAT() && C->isLive())
+      V.push_back(reinterpret_cast<SectionChunk *>(C));
+
+  std::unordered_set<SectionChunk *, Hasher, Equals> Set;
+  bool removed = false;
+  for (SectionChunk *C : V) {
+    auto P = Set.insert(C);
+    if (P.second)
+      continue;
+    SectionChunk *Existing = *P.first;
+    C->replaceWith(Existing);
+    removed = true;
+  }
+  // By merging sections, two relocations that originally pointed to
+  // different locations can now point to the same location.
+  // So, repeat the process until a convegence is obtained.
+  if (removed)
+    dedupCOMDATs();
+}
+
 // Create output section objects and add them to OutputSections.
 void Writer::createSections() {
+  if (Config->ICF)
+    dedupCOMDATs();
+
   // First, bin chunks by name.
   std::map<StringRef, std::vector<Chunk *>> Map;
   for (Chunk *C : Symtab->getChunks()) {

Modified: lld/trunk/COFF/Writer.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/COFF/Writer.h?rev=240519&r1=240518&r2=240519&view=diff
==============================================================================
--- lld/trunk/COFF/Writer.h (original)
+++ lld/trunk/COFF/Writer.h Tue Jun 23 23:36:52 2015
@@ -89,6 +89,7 @@ private:
 
   OutputSection *findSection(StringRef Name);
   OutputSection *createSection(StringRef Name);
+  void dedupCOMDATs();
   void addBaserels(OutputSection *Dest);
   void addBaserelBlocks(OutputSection *Dest, std::vector<uint32_t> &V);
 

Added: lld/trunk/test/COFF/Inputs/icf1.yaml
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/COFF/Inputs/icf1.yaml?rev=240519&view=auto
==============================================================================
--- lld/trunk/test/COFF/Inputs/icf1.yaml (added)
+++ lld/trunk/test/COFF/Inputs/icf1.yaml Tue Jun 23 23:36:52 2015
@@ -0,0 +1,43 @@
+---
+header:
+  Machine:         IMAGE_FILE_MACHINE_AMD64
+  Characteristics: []
+sections:
+  - Name:            .text
+    Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_LNK_COMDAT, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
+    Alignment:       16
+    SectionData:     0000000000000000
+    Relocations:
+      - VirtualAddress:  0
+        SymbolName:      foo
+        Type:            IMAGE_REL_AMD64_REL32
+  - Name:            .text
+    Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
+    Alignment:       16
+    SectionData:     00000000
+symbols:
+  - Name:            .text
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition:
+      Length:          8
+      NumberOfRelocations: 1
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+      Selection:       IMAGE_COMDAT_SELECT_ANY
+  - Name:            mainCRTStartup
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_FUNCTION
+    StorageClass:    IMAGE_SYM_CLASS_EXTERNAL
+  - Name:            foo
+    Value:           0
+    SectionNumber:   2
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_FUNCTION
+    StorageClass:    IMAGE_SYM_CLASS_EXTERNAL

Added: lld/trunk/test/COFF/Inputs/icf2.yaml
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/COFF/Inputs/icf2.yaml?rev=240519&view=auto
==============================================================================
--- lld/trunk/test/COFF/Inputs/icf2.yaml (added)
+++ lld/trunk/test/COFF/Inputs/icf2.yaml Tue Jun 23 23:36:52 2015
@@ -0,0 +1,40 @@
+---
+header:
+  Machine:         IMAGE_FILE_MACHINE_AMD64
+  Characteristics: []
+sections:
+  - Name:            .text
+    Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_LNK_COMDAT, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
+    Alignment:       16
+    SectionData:     0000000000000000
+    Relocations:
+      - VirtualAddress:  0
+        SymbolName:      foo
+        Type:            IMAGE_REL_AMD64_REL32
+symbols:
+  - Name:            .text
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition:
+      Length:          8
+      NumberOfRelocations: 1
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+      Selection:       IMAGE_COMDAT_SELECT_ANY
+  # icf2 is identical with mainCRTStartup
+  - Name:            icf2
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_FUNCTION
+    StorageClass:    IMAGE_SYM_CLASS_EXTERNAL
+  - Name:            foo
+    Value:           0
+    SectionNumber:   0
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_FUNCTION
+    StorageClass:    IMAGE_SYM_CLASS_EXTERNAL

Added: lld/trunk/test/COFF/Inputs/icf3.yaml
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/COFF/Inputs/icf3.yaml?rev=240519&view=auto
==============================================================================
--- lld/trunk/test/COFF/Inputs/icf3.yaml (added)
+++ lld/trunk/test/COFF/Inputs/icf3.yaml Tue Jun 23 23:36:52 2015
@@ -0,0 +1,40 @@
+---
+header:
+  Machine:         IMAGE_FILE_MACHINE_AMD64
+  Characteristics: []
+sections:
+  - Name:            .text
+    Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_LNK_COMDAT, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
+    Alignment:       16
+    SectionData:     0000000000000000
+    Relocations:
+      - VirtualAddress:  4
+        SymbolName:      foo
+        Type:            IMAGE_REL_AMD64_REL32
+symbols:
+  - Name:            .text
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition:
+      Length:          8
+      NumberOfRelocations: 1
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+      Selection:       IMAGE_COMDAT_SELECT_ANY
+  # icf3 is *not* identical with mainCRTStartup because its relocation is different
+  - Name:            icf3
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_FUNCTION
+    StorageClass:    IMAGE_SYM_CLASS_EXTERNAL
+  - Name:            foo
+    Value:           0
+    SectionNumber:   0
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_FUNCTION
+    StorageClass:    IMAGE_SYM_CLASS_EXTERNAL

Added: lld/trunk/test/COFF/icf.test
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/COFF/icf.test?rev=240519&view=auto
==============================================================================
--- lld/trunk/test/COFF/icf.test (added)
+++ lld/trunk/test/COFF/icf.test Tue Jun 23 23:36:52 2015
@@ -0,0 +1,11 @@
+# RUN: yaml2obj < %p/Inputs/icf1.yaml > %t1.obj
+# RUN: yaml2obj < %p/Inputs/icf2.yaml > %t2.obj
+# RUN: yaml2obj < %p/Inputs/icf3.yaml > %t3.obj
+#
+# RUN: lld -flavor link2 /out:%t.exe %t1.obj %t2.obj %t3.obj \
+# RUN:   /opt:icf /include:icf2 /include:icf3 /verbose >& %t.log
+# RUN: FileCheck %s < %t.log
+
+CHECK-NOT: Replaced mainCRTStartup
+CHECK:     Replaced icf2
+CHECK-NOT: Replaced icf3





More information about the llvm-commits mailing list