[lld] r337429 - ELF: Implement --icf=safe using address-significance tables.

Peter Collingbourne via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 18 15:49:31 PDT 2018


Author: pcc
Date: Wed Jul 18 15:49:31 2018
New Revision: 337429

URL: http://llvm.org/viewvc/llvm-project?rev=337429&view=rev
Log:
ELF: Implement --icf=safe using address-significance tables.

Differential Revision: https://reviews.llvm.org/D48146

Added:
    lld/trunk/test/ELF/Inputs/icf-safe.s
    lld/trunk/test/ELF/icf-safe.s
Modified:
    lld/trunk/ELF/Config.h
    lld/trunk/ELF/Driver.cpp
    lld/trunk/ELF/ICF.cpp
    lld/trunk/ELF/InputFiles.cpp
    lld/trunk/ELF/InputFiles.h
    lld/trunk/ELF/Options.td
    lld/trunk/docs/ld.lld.1

Modified: lld/trunk/ELF/Config.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Config.h?rev=337429&r1=337428&r2=337429&view=diff
==============================================================================
--- lld/trunk/ELF/Config.h (original)
+++ lld/trunk/ELF/Config.h Wed Jul 18 15:49:31 2018
@@ -40,6 +40,9 @@ enum class BuildIdKind { None, Fast, Md5
 // For --discard-{all,locals,none}.
 enum class DiscardPolicy { Default, All, Locals, None };
 
+// For --icf={none,safe,all}.
+enum class ICFLevel { None, Safe, All };
+
 // For --strip-{all,debug}.
 enum class StripPolicy { None, All, Debug };
 
@@ -138,7 +141,6 @@ struct Configuration {
   bool GnuUnique;
   bool HasDynamicList = false;
   bool HasDynSymTab;
-  bool ICF;
   bool IgnoreDataAddressEquality;
   bool IgnoreFunctionAddressEquality;
   bool LTODebugPassManager;
@@ -187,6 +189,7 @@ struct Configuration {
   bool ZRetpolineplt;
   bool ZWxneeded;
   DiscardPolicy Discard;
+  ICFLevel ICF;
   OrphanHandlingPolicy OrphanHandling;
   SortSectionPolicy SortSection;
   StripPolicy Strip;

Modified: lld/trunk/ELF/Driver.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Driver.cpp?rev=337429&r1=337428&r2=337429&view=diff
==============================================================================
--- lld/trunk/ELF/Driver.cpp (original)
+++ lld/trunk/ELF/Driver.cpp Wed Jul 18 15:49:31 2018
@@ -51,6 +51,7 @@
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compression.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/TarWriter.h"
 #include "llvm/Support/TargetSelect.h"
@@ -296,7 +297,7 @@ static void checkOptions(opt::InputArgLi
       error("-r and --gc-sections may not be used together");
     if (Config->GdbIndex)
       error("-r and --gdb-index may not be used together");
-    if (Config->ICF)
+    if (Config->ICF != ICFLevel::None)
       error("-r and --icf may not be used together");
     if (Config->Pie)
       error("-r and -pie may not be used together");
@@ -519,6 +520,15 @@ static StringRef getDynamicLinker(opt::I
   return Arg->getValue();
 }
 
+static ICFLevel getICF(opt::InputArgList &Args) {
+  auto *Arg = Args.getLastArg(OPT_icf_none, OPT_icf_safe, OPT_icf_all);
+  if (!Arg || Arg->getOption().getID() == OPT_icf_none)
+    return ICFLevel::None;
+  if (Arg->getOption().getID() == OPT_icf_safe)
+    return ICFLevel::Safe;
+  return ICFLevel::All;
+}
+
 static StripPolicy getStrip(opt::InputArgList &Args) {
   if (Args.hasArg(OPT_relocatable))
     return StripPolicy::None;
@@ -745,7 +755,7 @@ void LinkerDriver::readConfigs(opt::Inpu
   Config->GcSections = Args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false);
   Config->GnuUnique = Args.hasFlag(OPT_gnu_unique, OPT_no_gnu_unique, true);
   Config->GdbIndex = Args.hasFlag(OPT_gdb_index, OPT_no_gdb_index, false);
-  Config->ICF = Args.hasFlag(OPT_icf_all, OPT_icf_none, false);
+  Config->ICF = getICF(Args);
   Config->IgnoreDataAddressEquality =
       Args.hasArg(OPT_ignore_data_address_equality);
   Config->IgnoreFunctionAddressEquality =
@@ -1225,16 +1235,60 @@ template <class ELFT> static void demote
   }
 }
 
+static bool keepUnique(Symbol *S) {
+  if (auto *D = dyn_cast_or_null<Defined>(S)) {
+    if (D->Section) {
+      D->Section->KeepUnique = true;
+      return true;
+    }
+  }
+  return false;
+}
+
 // Record sections that define symbols mentioned in --keep-unique <symbol>
-// these sections are inelligible for ICF.
+// and symbols referred to by address-significance tables. These sections are
+// ineligible for ICF.
+template <class ELFT>
 static void findKeepUniqueSections(opt::InputArgList &Args) {
   for (auto *Arg : Args.filtered(OPT_keep_unique)) {
     StringRef Name = Arg->getValue();
-    if (auto *Sym = dyn_cast_or_null<Defined>(Symtab->find(Name)))
-      Sym->Section->KeepUnique = true;
-    else
+    if (!keepUnique(Symtab->find(Name)))
       warn("could not find symbol " + Name + " to keep unique");
   }
+
+  if (Config->ICF == ICFLevel::Safe) {
+    // Symbols in the dynsym could be address-significant in other executables
+    // or DSOs, so we conservatively mark them as address-significant.
+    for (Symbol *S : Symtab->getSymbols())
+      if (S->includeInDynsym())
+        keepUnique(S);
+
+    // Visit the address-significance table in each object file and mark each
+    // referenced symbol as address-significant.
+    for (InputFile *F : ObjectFiles) {
+      auto *Obj = cast<ObjFile<ELFT>>(F);
+      ArrayRef<Symbol *> Syms = Obj->getSymbols();
+      if (Obj->AddrsigSec) {
+        ArrayRef<uint8_t> Contents =
+            check(Obj->getObj().getSectionContents(Obj->AddrsigSec));
+        const uint8_t *Cur = Contents.begin();
+        while (Cur != Contents.end()) {
+          unsigned Size;
+          const char *Err;
+          uint64_t SymIndex = decodeULEB128(Cur, &Size, Contents.end(), &Err);
+          if (Err)
+            fatal(toString(F) + ": could not decode addrsig section: " + Err);
+          keepUnique(Syms[SymIndex]);
+          Cur += Size;
+        }
+      } else {
+        // If an object file does not have an address-significance table,
+        // conservatively mark all of its symbols as address-significant.
+        for (Symbol *S : Syms)
+          keepUnique(S);
+      }
+    }
+  }
 }
 
 // Do actual linking. Note that when this function is called,
@@ -1409,8 +1463,8 @@ template <class ELFT> void LinkerDriver:
   markLive<ELFT>();
   demoteSymbols<ELFT>();
   mergeSections();
-  if (Config->ICF) {
-    findKeepUniqueSections(Args);
+  if (Config->ICF != ICFLevel::None) {
+    findKeepUniqueSections<ELFT>(Args);
     doIcf<ELFT>();
   }
 

Modified: lld/trunk/ELF/ICF.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/ICF.cpp?rev=337429&r1=337428&r2=337429&view=diff
==============================================================================
--- lld/trunk/ELF/ICF.cpp (original)
+++ lld/trunk/ELF/ICF.cpp Wed Jul 18 15:49:31 2018
@@ -173,8 +173,9 @@ static bool isEligible(InputSection *S)
     return false;
 
   // Don't merge read only data sections unless
-  // --ignore-data-address-equality was passed.
-  if (!(S->Flags & SHF_EXECINSTR) && !Config->IgnoreDataAddressEquality)
+  // --ignore-data-address-equality or --icf=safe was passed.
+  if (!(S->Flags & SHF_EXECINSTR) &&
+      !(Config->IgnoreDataAddressEquality || Config->ICF == ICFLevel::Safe))
     return false;
 
   // Don't merge synthetic sections as their Data member is not valid and empty.

Modified: lld/trunk/ELF/InputFiles.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputFiles.cpp?rev=337429&r1=337428&r2=337429&view=diff
==============================================================================
--- lld/trunk/ELF/InputFiles.cpp (original)
+++ lld/trunk/ELF/InputFiles.cpp Wed Jul 18 15:49:31 2018
@@ -420,6 +420,17 @@ void ObjFile<ELFT>::initializeSections(
     // if -r is given, we'll let the final link discard such sections.
     // This is compatible with GNU.
     if ((Sec.sh_flags & SHF_EXCLUDE) && !Config->Relocatable) {
+      if (Sec.sh_type == SHT_LLVM_ADDRSIG) {
+        // We ignore the address-significance table if we know that the object
+        // file was created by objcopy or ld -r. This is because these tools
+        // will reorder the symbols in the symbol table, invalidating the data
+        // in the address-significance table, which refers to symbols by index.
+        if (Sec.sh_link != 0)
+          this->AddrsigSec = &Sec;
+        else if (Config->ICF == ICFLevel::Safe)
+          warn(toString(this) + ": --icf=safe is incompatible with object "
+                                "files created using objcopy or ld -r");
+      }
       this->Sections[I] = &InputSection::Discarded;
       continue;
     }

Modified: lld/trunk/ELF/InputFiles.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputFiles.h?rev=337429&r1=337428&r2=337429&view=diff
==============================================================================
--- lld/trunk/ELF/InputFiles.h (original)
+++ lld/trunk/ELF/InputFiles.h Wed Jul 18 15:49:31 2018
@@ -215,6 +215,9 @@ public:
   // but had one or more functions with the no_split_stack attribute.
   bool SomeNoSplitStack = false;
 
+  // Pointer to this input file's .llvm_addrsig section, if it has one.
+  const Elf_Shdr *AddrsigSec = nullptr;
+
 private:
   void
   initializeSections(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups);

Modified: lld/trunk/ELF/Options.td
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Options.td?rev=337429&r1=337428&r2=337429&view=diff
==============================================================================
--- lld/trunk/ELF/Options.td (original)
+++ lld/trunk/ELF/Options.td Wed Jul 18 15:49:31 2018
@@ -170,6 +170,8 @@ def help: F<"help">, HelpText<"Print opt
 
 def icf_all: F<"icf=all">, HelpText<"Enable identical code folding">;
 
+def icf_safe: F<"icf=safe">, HelpText<"Enable safe identical code folding">;
+
 def icf_none: F<"icf=none">, HelpText<"Disable identical code folding (default)">;
 
 def ignore_function_address_equality: F<"ignore-function-address-equality">,

Modified: lld/trunk/docs/ld.lld.1
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/docs/ld.lld.1?rev=337429&r1=337428&r2=337429&view=diff
==============================================================================
--- lld/trunk/docs/ld.lld.1 (original)
+++ lld/trunk/docs/ld.lld.1 Wed Jul 18 15:49:31 2018
@@ -187,6 +187,8 @@ is the default.
 Print a help message.
 .It Fl -icf Ns = Ns Cm all
 Enable identical code folding.
+.It Fl -icf Ns = Ns Cm safe
+Enable safe identical code folding.
 .It Fl -icf Ns = Ns Cm none
 Disable identical code folding.
 .It Fl -image-base Ns = Ns Ar value

Added: lld/trunk/test/ELF/Inputs/icf-safe.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/Inputs/icf-safe.s?rev=337429&view=auto
==============================================================================
--- lld/trunk/test/ELF/Inputs/icf-safe.s (added)
+++ lld/trunk/test/ELF/Inputs/icf-safe.s Wed Jul 18 15:49:31 2018
@@ -0,0 +1,9 @@
+.section .text.non_addrsig1,"ax", at progbits
+.globl non_addrsig1
+non_addrsig1:
+ret
+
+.section .text.non_addrsig2,"ax", at progbits
+.globl non_addrsig2
+non_addrsig2:
+ret

Added: lld/trunk/test/ELF/icf-safe.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/icf-safe.s?rev=337429&view=auto
==============================================================================
--- lld/trunk/test/ELF/icf-safe.s (added)
+++ lld/trunk/test/ELF/icf-safe.s Wed Jul 18 15:49:31 2018
@@ -0,0 +1,137 @@
+# REQUIRES: x86
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1.o
+# RUN: llvm-objcopy %t1.o %t1copy.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %S/Inputs/icf-safe.s -o %t2.o
+# RUN: ld.lld %t1.o %t2.o -o %t2 --icf=safe --print-icf-sections | FileCheck %s
+# RUN: ld.lld %t1.o %t2.o -o %t3 --icf=safe --print-icf-sections -shared | FileCheck --check-prefix=EXPORT %s
+# RUN: ld.lld %t1.o %t2.o -o %t3 --icf=safe --print-icf-sections --export-dynamic | FileCheck --check-prefix=EXPORT %s
+# RUN: ld.lld %t1copy.o -o %t4 --icf=safe 2>&1 | FileCheck --check-prefix=OBJCOPY %s
+
+# CHECK-NOT: selected section {{.*}}:(.rodata.l1)
+# CHECK: selected section {{.*}}:(.rodata.l3)
+# CHECK:   removing identical section {{.*}}:(.rodata.l4)
+
+# CHECK-NOT: selected section {{.*}}:(.text.f1)
+# CHECK: selected section {{.*}}:(.text.f3)
+# CHECK:   removing identical section {{.*}}:(.text.f4)
+
+# CHECK-NOT: selected section {{.*}}:(.rodata.h1)
+# CHECK: selected section {{.*}}:(.rodata.h3)
+# CHECK:   removing identical section {{.*}}:(.rodata.h4)
+
+# CHECK-NOT: selected section {{.*}}:(.rodata.g1)
+# CHECK: selected section {{.*}}:(.rodata.g3)
+# CHECK:   removing identical section {{.*}}:(.rodata.g4)
+
+# CHECK-NOT: selected section {{.*}}:(.text.non_addrsig{{.}})
+
+# llvm-mc normally emits an empty .text section into every object file. Since
+# nothing actually refers to it via a relocation, it doesn't have any associated
+# symbols (thus nor can anything refer to it via a relocation, making it safe to
+# merge with the empty section in the other input file). Here we check that the
+# only two sections merged are the two empty sections and the sections with only
+# STB_LOCAL or STV_HIDDEN symbols. The dynsym entries should have prevented
+# anything else from being merged.
+# EXPORT-NOT: selected section
+# EXPORT: selected section {{.*}}:(.rodata.l3)
+# EXPORT:   removing identical section {{.*}}:(.rodata.l4)
+# EXPORT-NOT: selected section
+# EXPORT: selected section {{.*}}:(.rodata.h3)
+# EXPORT:   removing identical section {{.*}}:(.rodata.h4)
+# EXPORT-NOT: selected section
+# EXPORT: selected section {{.*}}:(.text)
+# EXPORT:   removing identical section {{.*}}:(.text)
+# EXPORT-NOT: selected section
+
+# OBJCOPY: --icf=safe is incompatible with object files created using objcopy or ld -r
+
+.section .text.f1,"ax", at progbits
+.globl f1
+f1:
+ret
+
+.section .text.f2,"ax", at progbits
+.globl f2
+f2:
+ret
+
+.section .text.f3,"ax", at progbits
+.globl f3
+f3:
+ud2
+
+.section .text.f4,"ax", at progbits
+.globl f4
+f4:
+ud2
+
+.section .rodata.g1,"a", at progbits
+.globl g1
+g1:
+.byte 1
+
+.section .rodata.g2,"a", at progbits
+.globl g2
+g2:
+.byte 1
+
+.section .rodata.g3,"a", at progbits
+.globl g3
+g3:
+.byte 2
+
+.section .rodata.g4,"a", at progbits
+.globl g4
+g4:
+.byte 2
+
+.section .rodata.l1,"a", at progbits
+l1:
+.byte 3
+
+.section .rodata.l2,"a", at progbits
+l2:
+.byte 3
+
+.section .rodata.l3,"a", at progbits
+l3:
+.byte 4
+
+.section .rodata.l4,"a", at progbits
+l4:
+.byte 4
+
+.section .rodata.h1,"a", at progbits
+.globl h1
+.hidden h1
+h1:
+.byte 5
+
+.section .rodata.h2,"a", at progbits
+.globl h2
+.hidden h2
+h2:
+.byte 5
+
+.section .rodata.h3,"a", at progbits
+.globl h3
+.hidden h3
+h3:
+.byte 6
+
+.section .rodata.h4,"a", at progbits
+.globl h4
+.hidden h4
+h4:
+.byte 6
+
+.addrsig
+.addrsig_sym f1
+.addrsig_sym f2
+.addrsig_sym g1
+.addrsig_sym g2
+.addrsig_sym l1
+.addrsig_sym l2
+.addrsig_sym h1
+.addrsig_sym h2




More information about the llvm-commits mailing list