[lld] 64da33a - ELF: Introduce --randomize-section-padding option.

via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 13 11:52:13 PST 2024


Author: Peter Collingbourne
Date: 2024-12-13T11:52:09-08:00
New Revision: 64da33a58923e60a5c7854c1a13e14f16d01b1f0

URL: https://github.com/llvm/llvm-project/commit/64da33a58923e60a5c7854c1a13e14f16d01b1f0
DIFF: https://github.com/llvm/llvm-project/commit/64da33a58923e60a5c7854c1a13e14f16d01b1f0.diff

LOG: ELF: Introduce --randomize-section-padding option.

The --randomize-section-padding option randomly inserts padding between
input sections using the given seed. It is intended to be used in A/B
experiments to determine the average effect of a change on program
performance, while controlling for effects such as false sharing in
the cache which may introduce measurement bias. For more details,
see the RFC:

https://discourse.llvm.org/t/rfc-lld-feature-for-controlling-for-code-size-dependent-measurement-bias/83334

Reviewers: smithp35, MaskRay

Reviewed By: MaskRay, smithp35

Pull Request: https://github.com/llvm/llvm-project/pull/117653

Added: 
    lld/test/ELF/randomize-section-padding.test

Modified: 
    lld/ELF/Config.h
    lld/ELF/Driver.cpp
    lld/ELF/Options.td
    lld/ELF/OutputSections.h
    lld/ELF/SyntheticSections.cpp
    lld/ELF/SyntheticSections.h
    lld/ELF/Writer.cpp
    lld/docs/ld.lld.1

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index a2836733c2715e..5b6b332cd597df 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -320,6 +320,7 @@ struct Config {
   bool printGcSections;
   bool printIcfSections;
   bool printMemoryUsage;
+  std::optional<uint64_t> randomizeSectionPadding;
   bool rejectMismatch;
   bool relax;
   bool relaxGP;

diff  --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 3c553e5043180b..9240f29d98d614 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1410,6 +1410,9 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
   ctx.arg.searchPaths = args::getStrings(args, OPT_library_path);
   ctx.arg.sectionStartMap = getSectionStartMap(ctx, args);
   ctx.arg.shared = args.hasArg(OPT_shared);
+  if (args.hasArg(OPT_randomize_section_padding))
+    ctx.arg.randomizeSectionPadding =
+        args::getInteger(args, OPT_randomize_section_padding, 0);
   ctx.arg.singleRoRx = !args.hasFlag(OPT_rosegment, OPT_no_rosegment, true);
   ctx.arg.soName = args.getLastArgValue(OPT_soname);
   ctx.arg.sortSection = getSortSection(ctx, args);

diff  --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index ebe77204264210..c31875305952fb 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -434,6 +434,9 @@ defm section_start: Eq<"section-start", "Set address of section">,
 
 def shared: F<"shared">, HelpText<"Build a shared object">;
 
+def randomize_section_padding: JJ<"randomize-section-padding=">,
+  HelpText<"Randomly insert padding between input sections and at the start of each segment using given seed">;
+
 defm soname: Eq<"soname", "Set DT_SONAME">;
 
 defm sort_section:

diff  --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h
index 67191392d1dbe7..3ab36a21ce488d 100644
--- a/lld/ELF/OutputSections.h
+++ b/lld/ELF/OutputSections.h
@@ -124,14 +124,14 @@ class OutputSection final : public SectionBase {
   void sortInitFini();
   void sortCtorsDtors();
 
+  std::array<uint8_t, 4> getFiller(Ctx &);
+
   // Used for implementation of --compress-debug-sections and
   // --compress-sections.
   CompressedData compressed;
 
 private:
   SmallVector<InputSection *, 0> storage;
-
-  std::array<uint8_t, 4> getFiller(Ctx &);
 };
 
 struct OutputDesc final : SectionCommand {

diff  --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 6c5f2a614639c8..c8a05e4b9c3cc6 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -2753,6 +2753,21 @@ RelroPaddingSection::RelroPaddingSection(Ctx &ctx)
     : SyntheticSection(ctx, ".relro_padding", SHT_NOBITS, SHF_ALLOC | SHF_WRITE,
                        1) {}
 
+RandomizePaddingSection::RandomizePaddingSection(Ctx &ctx, uint64_t size,
+                                                 OutputSection *parent)
+    : SyntheticSection(ctx, ".randomize_padding", SHT_PROGBITS, SHF_ALLOC, 1),
+      size(size) {
+  this->parent = parent;
+}
+
+void RandomizePaddingSection::writeTo(uint8_t *buf) {
+  std::array<uint8_t, 4> filler = getParent()->getFiller(ctx);
+  uint8_t *end = buf + size;
+  for (; buf + 4 <= end; buf += 4)
+    memcpy(buf, &filler[0], 4);
+  memcpy(buf, &filler[0], end - buf);
+}
+
 // The string hash function for .gdb_index.
 static uint32_t computeGdbHash(StringRef s) {
   uint32_t h = 0;

diff  --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index 4b643e86335510..132513cbd3b796 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -796,6 +796,15 @@ class RelroPaddingSection final : public SyntheticSection {
   void writeTo(uint8_t *buf) override {}
 };
 
+class RandomizePaddingSection final : public SyntheticSection {
+  uint64_t size;
+
+public:
+  RandomizePaddingSection(Ctx &ctx, uint64_t size, OutputSection *parent);
+  size_t getSize() const override { return size; }
+  void writeTo(uint8_t *buf) override;
+};
+
 // Used by the merged DWARF32 .debug_names (a per-module index). If we
 // move to DWARF64, most of this data will need to be re-sized.
 class DebugNamesBaseSection : public SyntheticSection {

diff  --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 6c16549bfa6c04..d5581ca3e1c921 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1449,6 +1449,40 @@ static void finalizeSynthetic(Ctx &ctx, SyntheticSection *sec) {
   }
 }
 
+static bool canInsertPadding(OutputSection *sec) {
+  StringRef s = sec->name;
+  return s == ".bss" || s == ".data" || s == ".data.rel.ro" || s == ".lbss" ||
+         s == ".ldata" || s == ".lrodata" || s == ".ltext" || s == ".rodata" ||
+         s.starts_with(".text");
+}
+
+static void randomizeSectionPadding(Ctx &ctx) {
+  std::mt19937 g(*ctx.arg.randomizeSectionPadding);
+  PhdrEntry *curPtLoad = nullptr;
+  for (OutputSection *os : ctx.outputSections) {
+    if (!canInsertPadding(os))
+      continue;
+    for (SectionCommand *bc : os->commands) {
+      if (auto *isd = dyn_cast<InputSectionDescription>(bc)) {
+        SmallVector<InputSection *, 0> tmp;
+        if (os->ptLoad != curPtLoad) {
+          tmp.push_back(make<RandomizePaddingSection>(
+              ctx, g() % ctx.arg.maxPageSize, os));
+          curPtLoad = os->ptLoad;
+        }
+        for (InputSection *isec : isd->sections) {
+          // Probability of inserting padding is 1 in 16.
+          if (g() % 16 == 0)
+            tmp.push_back(
+                make<RandomizePaddingSection>(ctx, isec->addralign, os));
+          tmp.push_back(isec);
+        }
+        isd->sections = std::move(tmp);
+      }
+    }
+  }
+}
+
 // We need to generate and finalize the content that depends on the address of
 // InputSections. As the generation of the content may also alter InputSection
 // addresses we must converge to a fixed point. We do that here. See the comment
@@ -1475,6 +1509,9 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() {
   if (ctx.arg.emachine == EM_HEXAGON)
     hexagonTLSSymbolUpdate(ctx);
 
+  if (ctx.arg.randomizeSectionPadding)
+    randomizeSectionPadding(ctx);
+
   uint32_t pass = 0, assignPasses = 0;
   for (;;) {
     bool changed = ctx.target->needsThunks

diff  --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1
index b22cb362837715..2fa6f64b2d2032 100644
--- a/lld/docs/ld.lld.1
+++ b/lld/docs/ld.lld.1
@@ -529,6 +529,19 @@ and
 .It Fl -pop-state
 Restore the states saved by
 .Fl -push-state.
+.It Fl -randomize-section-padding Ns = Ns Ar seed
+Randomly insert padding between input sections and at the start of each segment using the given seed.
+Padding is inserted into output sections with names matching the following patterns:
+.Cm .bss ,
+.Cm .data ,
+.Cm .data.rel.ro ,
+.Cm .lbss ,
+.Cm .ldata ,
+.Cm .lrodata ,
+.Cm .ltext ,
+.Cm .rodata
+and
+.Cm .text* .
 .It Fl --relax-gp
 Enable global pointer relaxation for RISC-V.
 .It Fl -relocatable , Fl r

diff  --git a/lld/test/ELF/randomize-section-padding.test b/lld/test/ELF/randomize-section-padding.test
new file mode 100644
index 00000000000000..af8e4f14981cd1
--- /dev/null
+++ b/lld/test/ELF/randomize-section-padding.test
@@ -0,0 +1,125 @@
+# REQUIRES: x86
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/a.s -o %t/a.o
+
+## --randomize-section-padding= inserts segment offset padding and pre-section
+## padding, and does not affect flags. Segment offset padding is only inserted
+## when PT_LOAD changes, as shown by .bss size (.data and .bss share a PT_LOAD).
+
+# RUN: ld.lld --randomize-section-padding=6 %t/a.o -o %t/a.out
+# RUN: llvm-readelf -sS -x .rodata -x .text -x .data %t/a.out | FileCheck --check-prefix=PAD6 %s
+
+# PAD6:      .rodata           PROGBITS        0000000000200158 000158 000b8d 00   A  0   0  1
+# PAD6-NEXT: .text             PROGBITS        0000000000201ce8 000ce8 000270 00  AX  0   0  4
+# PAD6-NEXT: .data             PROGBITS        0000000000202f58 000f58 000941 00  WA  0   0  1
+# PAD6-NEXT: .bss              NOBITS          0000000000203899 001899 000003 00  WA  0   0  1
+
+# PAD6: 0000000000203899     0 NOTYPE  LOCAL  DEFAULT     4 a
+# PAD6: 000000000020389a     0 NOTYPE  LOCAL  DEFAULT     4 b
+# PAD6: 000000000020389b     0 NOTYPE  LOCAL  DEFAULT     4 c
+
+# PAD6: Hex dump of section '.rodata':
+# PAD6: 0x00200cd8 00000000 00000000 00000102 03
+# PAD6: Hex dump of section '.text':
+# PAD6: 0x00201f48 cccccccc cccccccc cccccccc 0405cc06
+# PAD6: Hex dump of section '.data':
+# PAD6: 0x00203888 00000000 00000000 00000000 00000708
+# PAD6: 0x00203898 09
+
+## Size of segment offset padding and location of pre-section padding is
+## dependent on the seed.
+
+# RUN: ld.lld --randomize-section-padding=46 %t/a.o -o %t/a.out
+# RUN: llvm-readelf -sS -x .rodata -x .text -x .data %t/a.out | FileCheck --check-prefix=PAD46 %s
+
+# PAD46:      .rodata           PROGBITS        0000000000200158 000158 000cc0 00   A  0   0  1
+# PAD46-NEXT: .text             PROGBITS        0000000000201e18 000e18 0009bf 00  AX  0   0  4
+# PAD46-NEXT: .data             PROGBITS        00000000002037d7 0017d7 000540 00  WA  0   0  1
+# PAD46-NEXT: .bss              NOBITS          0000000000203d17 001d17 000004 00  WA  0   0  1
+
+# PAD46: 0000000000203d17     0 NOTYPE  LOCAL  DEFAULT     4 a
+# PAD46: 0000000000203d18     0 NOTYPE  LOCAL  DEFAULT     4 b
+# PAD46: 0000000000203d1a     0 NOTYPE  LOCAL  DEFAULT     4 c
+
+# PAD46: Hex dump of section '.rodata':
+# PAD46: 0x00200e08 00000000 00000000 00000000 00010203
+# PAD46: Hex dump of section '.text':
+# PAD46: 0x002027c8 cccccccc cccccccc cccccccc 040506
+# PAD46: Hex dump of section '.data':
+# PAD46: 0x00203d07 00000000 00000000 00000000 07000809
+
+## When there are multiple InputSectionDescriptions for an output section,
+## segment offset padding is inserted in the first InputSectionDescription.
+
+# RUN: ld.lld --randomize-section-padding=46 %t/a.o %t/a.lds -o %t/a.out
+
+# RUN: llvm-readelf -sS -x .rodata -x .text -x .data %t/a.out | FileCheck --check-prefix=PAD46-LDS %s
+
+# PAD46-LDS:      .rodata           PROGBITS        0000000000000158 000158 000cc0 00   A  0   0  1
+# PAD46-LDS-NEXT: .text             PROGBITS        0000000000001000 001000 0009c0 00  AX  0   0  4
+# PAD46-LDS-NEXT: .data             PROGBITS        0000000000002000 002000 000540 00  WA  0   0  1
+# PAD46-LDS-NEXT: .bss              NOBITS          0000000000002540 002540 000004 00  WA  0   0  1
+
+# PAD46-LDS: 0000000000002543     0 NOTYPE  LOCAL  DEFAULT     4 a
+# PAD46-LDS: 0000000000002541     0 NOTYPE  LOCAL  DEFAULT     4 b
+# PAD46-LDS: 0000000000002540     0 NOTYPE  LOCAL  DEFAULT     4 c
+
+# PAD46-LDS: Hex dump of section '.rodata':
+# PAD46-LDS: 0x00000e08 00000000 00000000 00000000 00030201 ................
+# PAD46-LDS: Hex dump of section '.text':
+# PAD46-LDS: 0x000019b0 cccccccc cccccccc cccc0605 04cccccc ................
+# PAD46-LDS: Hex dump of section '.data':
+# PAD46-LDS: 0x00002530 00000000 00000000 00000000 09000807 ................
+
+#--- a.s
+
+.section .rodata.a,"a", at progbits
+.byte 1
+
+.section .rodata.b,"a", at progbits
+.byte 2
+
+.section .rodata.c,"a", at progbits
+.byte 3
+
+.section .text.a,"ax", at progbits
+.byte 4
+
+.section .text.b,"ax", at progbits
+.byte 5
+
+.section .text.c,"ax", at progbits
+.byte 6
+
+.section .data.a,"aw", at progbits
+.byte 7
+
+.section .data.b,"aw", at progbits
+.byte 8
+
+.section .data.c,"aw", at progbits
+.byte 9
+
+.section .bss.a,"a", at nobits
+a:
+.zero 1
+
+.section .bss.b,"a", at nobits
+b:
+.zero 1
+
+.section .bss.c,"a", at nobits
+c:
+.zero 1
+
+#--- a.lds
+
+SECTIONS {
+  . = SIZEOF_HEADERS;
+  .rodata : { *(.rodata.c) *(.rodata.b) *(.rodata.a) }
+  . = ALIGN(CONSTANT(MAXPAGESIZE));
+  .text : { *(.text.c) *(.text.b) *(.text.a) }
+  . = ALIGN(CONSTANT(MAXPAGESIZE));
+  .data : { *(.data.c) *(.data.b) *(.data.a) }
+  .bss : { *(.bss.c) *(.bss.b) *(.bss.a) }
+}


        


More information about the llvm-commits mailing list