[lld] dbd0ad3 - [LLD][ELF] Add support for INPUT_SECTION_FLAGS

Tue Jan 21 02:15:04 PST 2020

Author: Peter Smith
Date: 2020-01-21T10:05:26Z
New Revision: dbd0ad33668ea7b6d5c2c5db1fa290cc08ab99d9

URL: https://github.com/llvm/llvm-project/commit/dbd0ad33668ea7b6d5c2c5db1fa290cc08ab99d9
DIFF: https://github.com/llvm/llvm-project/commit/dbd0ad33668ea7b6d5c2c5db1fa290cc08ab99d9.diff

LOG: [LLD][ELF] Add support for INPUT_SECTION_FLAGS

The INPUT_SECTION_FLAGS linker script command is used to constrain the
section pattern matching to sections that match certain combinations of
flags.

There are two ways to express the constraint.
withFlags: Section must have these flags.
withoutFlags: Section must not have these flags.

The syntax of the command is:
INPUT_SECTION_FLAGS '(' sect_flag_list ')'
sect_flag_list: NAME
| sect_flag_list '&' NAME

Where NAME matches a section flag name such as SHF_EXECINSTR, or the
integer value of a section flag. If the first character of NAME is ! then
it means must not contain flag.

We do not support the rare case of { INPUT_SECTION_FLAGS(flags) filespec }
where filespec has no input section description like (.text).

As an example from the ld man page:
SECTIONS {
  .text : { INPUT_SECTION_FLAGS (SHF_MERGE & SHF_STRINGS) *(.text) }
  .text2 :  { INPUT_SECTION_FLAGS (!SHF_WRITE) *(.text) }
}
.text will match sections called .text that have both the SHF_MERGE and
SHF_STRINGS flag.
.text2 will match sections called .text that don't have the SHF_WRITE flag.

The flag names accepted are the generic to all targets and SHF_ARM_PURECODE
as it is very useful to filter all the pure code sections into a single
program header that can be marked execute never.

fixes PR44265

Differential Revision: https://reviews.llvm.org/D72756

Added: 
    lld/test/ELF/input-section-flags-diag1.test
    lld/test/ELF/input-section-flags-diag2.test
    lld/test/ELF/input-section-flags-diag3.test
    lld/test/ELF/input-section-flags-keep.s
    lld/test/ELF/input-section-flags.s

Modified: 
    lld/ELF/LinkerScript.cpp
    lld/ELF/LinkerScript.h
    lld/ELF/ScriptParser.cpp

Removed: 
    


################################################################################
diff  --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index b0d60bc32a9f..aaa7744a7809 100644

--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -335,7 +335,9 @@ bool LinkerScript::shouldKeep(InputSectionBase *s) {
   for (InputSectionDescription *id : keptSections)
     if (id->filePat.match(filename))
       for (SectionPattern &p : id->sectionPatterns)
-        if (p.sectionPat.match(s->name))
+        if (p.sectionPat.match(s->name) &&
+            (s->flags & id->withFlags) == id->withFlags &&
+            (s->flags & id->withoutFlags) == 0)
           return true;
   return false;
 }
@@ -431,7 +433,10 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd) {
         continue;
 
       std::string filename = getFilename(sec->file);
-      if (!cmd->filePat.match(filename) || pat.excludedFilePat.match(filename))
+      if (!cmd->filePat.match(filename) ||
+          pat.excludedFilePat.match(filename) ||
+          (sec->flags & cmd->withFlags) != cmd->withFlags ||
+          (sec->flags & cmd->withoutFlags) != 0)
         continue;
 
       ret.push_back(sec);

diff  --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index 25a14e08dade..d57301cf3524 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -155,8 +155,10 @@ struct SectionPattern {
 };
 
 struct InputSectionDescription : BaseCommand {
-  InputSectionDescription(StringRef filePattern)
-      : BaseCommand(InputSectionKind), filePat(filePattern) {}
+  InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
+                          uint64_t withoutFlags = 0)
+      : BaseCommand(InputSectionKind), filePat(filePattern),
+        withFlags(withFlags), withoutFlags(withoutFlags) {}
 
   static bool classof(const BaseCommand *c) {
     return c->kind == InputSectionKind;
@@ -180,6 +182,10 @@ struct InputSectionDescription : BaseCommand {
   // they were created in. This is used to insert newly created ThunkSections
   // into Sections at the end of a createThunks() pass.
   std::vector<std::pair<ThunkSection *, uint32_t>> thunkSections;
+
+  // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
+  uint64_t withFlags;
+  uint64_t withoutFlags;
 };
 
 // Represents BYTE(), SHORT(), LONG(), or QUAD().

diff  --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index fd8de3b54bd7..f62a0d133afd 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -30,6 +30,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include <cassert>
 #include <limits>
 #include <vector>
@@ -91,10 +92,13 @@ class ScriptParser final : ScriptLexer {
   OutputSection *readOutputSectionDescription(StringRef outSec);
   std::vector<BaseCommand *> readOverlay();
   std::vector<StringRef> readOutputSectionPhdrs();
+  std::pair<uint64_t, uint64_t> readInputSectionFlags();
   InputSectionDescription *readInputSectionDescription(StringRef tok);
   StringMatcher readFilePatterns();
   std::vector<SectionPattern> readInputSectionsList();
-  InputSectionDescription *readInputSectionRules(StringRef filePattern);
+  InputSectionDescription *readInputSectionRules(StringRef filePattern,
+                                                 uint64_t withFlags,
+                                                 uint64_t withoutFlags);
   unsigned readPhdrType();
   SortSectionPolicy readSortKind();
   SymbolAssignment *readProvideHidden(bool provide, bool hidden);
@@ -657,8 +661,10 @@ std::vector<SectionPattern> ScriptParser::readInputSectionsList() {
 //
 // <section-list> is parsed by readInputSectionsList().
 InputSectionDescription *
-ScriptParser::readInputSectionRules(StringRef filePattern) {
-  auto *cmd = make<InputSectionDescription>(filePattern);
+ScriptParser::readInputSectionRules(StringRef filePattern, uint64_t withFlags,
+                                    uint64_t withoutFlags) {
+  auto *cmd =
+      make<InputSectionDescription>(filePattern, withFlags, withoutFlags);
   expect("(");
 
   while (!errorCount() && !consume(")")) {
@@ -694,15 +700,23 @@ InputSectionDescription *
 ScriptParser::readInputSectionDescription(StringRef tok) {
   // Input section wildcard can be surrounded by KEEP.
   // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep
+  uint64_t withFlags = 0;
+  uint64_t withoutFlags = 0;
   if (tok == "KEEP") {
     expect("(");
-    StringRef filePattern = next();
-    InputSectionDescription *cmd = readInputSectionRules(filePattern);
+    if (consume("INPUT_SECTION_FLAGS"))
+      std::tie(withFlags, withoutFlags) = readInputSectionFlags();
+    InputSectionDescription *cmd =
+        readInputSectionRules(next(), withFlags, withoutFlags);
     expect(")");
     script->keptSections.push_back(cmd);
     return cmd;
   }
-  return readInputSectionRules(tok);
+  if (tok == "INPUT_SECTION_FLAGS") {
+    std::tie(withFlags, withoutFlags) = readInputSectionFlags();
+    tok = next();
+  }
+  return readInputSectionRules(tok, withFlags, withoutFlags);
 }
 
 void ScriptParser::readSort() {
@@ -781,9 +795,14 @@ OutputSection *ScriptParser::readOverlaySectionDescription() {
       script->createOutputSection(next(), getCurrentLocation());
   cmd->inOverlay = true;
   expect("{");
-  while (!errorCount() && !consume("}"))
-    cmd->sectionCommands.push_back(readInputSectionRules(next()));
-  cmd->phdrs = readOutputSectionPhdrs();
+  while (!errorCount() && !consume("}")) {
+    uint64_t withFlags = 0;
+    uint64_t withoutFlags = 0;
+    if (consume("INPUT_SECTION_FLAGS"))
+      std::tie(withFlags, withoutFlags) = readInputSectionFlags();
+    cmd->sectionCommands.push_back(
+        readInputSectionRules(next(), withFlags, withoutFlags));
+  }
   return cmd;
 }
 
@@ -841,6 +860,9 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef outSec) {
       // We have a file name and no input sections description. It is not a
       // commonly used syntax, but still acceptable. In that case, all sections
       // from the file will be included.
+      // FIXME: GNU ld permits INPUT_SECTION_FLAGS to be used here. We do not
+      // handle this case here as it will already have been matched by the
+      // case above.
       auto *isd = make<InputSectionDescription>(tok);
       isd->sectionPatterns.push_back({{}, StringMatcher({"*"})});
       cmd->sectionCommands.push_back(isd);
@@ -1102,6 +1124,63 @@ ByteCommand *ScriptParser::readByteCommand(StringRef tok) {
   return make<ByteCommand>(e, size, commandString);
 }
 
+static llvm::Optional<uint64_t> parseFlag(StringRef tok) {
+  if (llvm::Optional<uint64_t> asInt = parseInt(tok))
+    return asInt;
+#define CASE_ENT(enum) #enum, ELF::enum
+  return StringSwitch<llvm::Optional<uint64_t>>(tok)
+      .Case(CASE_ENT(SHF_WRITE))
+      .Case(CASE_ENT(SHF_ALLOC))
+      .Case(CASE_ENT(SHF_EXECINSTR))
+      .Case(CASE_ENT(SHF_MERGE))
+      .Case(CASE_ENT(SHF_STRINGS))
+      .Case(CASE_ENT(SHF_INFO_LINK))
+      .Case(CASE_ENT(SHF_LINK_ORDER))
+      .Case(CASE_ENT(SHF_OS_NONCONFORMING))
+      .Case(CASE_ENT(SHF_GROUP))
+      .Case(CASE_ENT(SHF_TLS))
+      .Case(CASE_ENT(SHF_COMPRESSED))
+      .Case(CASE_ENT(SHF_EXCLUDE))
+      .Case(CASE_ENT(SHF_ARM_PURECODE))
+      .Default(None);
+#undef CASE_ENT
+}
+
+// Reads the '(' <flags> ')' list of section flags in
+// INPUT_SECTION_FLAGS '(' <flags> ')' in the
+// following form:
+// <flags> ::= <flag>
+//           | <flags> & flag
+// <flag>  ::= Recognized Flag Name, or Integer value of flag.
+// If the first character of <flag> is a ! then this means without flag,
+// otherwise with flag.
+// Example: SHF_EXECINSTR & !SHF_WRITE means with flag SHF_EXECINSTR and
+// without flag SHF_WRITE.
+std::pair<uint64_t, uint64_t> ScriptParser::readInputSectionFlags() {
+   uint64_t withFlags = 0;
+   uint64_t withoutFlags = 0;
+   expect("(");
+   while (!errorCount()) {
+    StringRef tok = unquote(next());
+    bool without = tok.consume_front("!");
+    if (llvm::Optional<uint64_t> flag = parseFlag(tok)) {
+      if (without)
+        withoutFlags |= *flag;
+      else
+        withFlags |= *flag;
+    } else {
+      setError("unrecognised flag: " + tok);
+    }
+    if (consume(")"))
+      break;
+    if (!consume("&")) {
+      next();
+      setError("expected & or )");
+    }
+  }
+  return std::make_pair(withFlags, withoutFlags);
+}
+
 StringRef ScriptParser::readParenLiteral() {
   expect("(");
   bool orig = inExpr;

diff  --git a/lld/test/ELF/input-section-flags-diag1.test b/lld/test/ELF/input-section-flags-diag1.test
new file mode 100644
index 000000000000..329f17c86ec7
--- /dev/null
+++ b/lld/test/ELF/input-section-flags-diag1.test
@@ -0,0 +1,13 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 /dev/null -o %t.o
+# RUN: not ld.lld -shared %t.o -o /dev/null --script %s 2>&1 | FileCheck -strict-whitespace %s
+
+## Check that the section flag is recognized.
+
+SECTIONS {
+ .text : { INPUT_SECTION_FLAGS(UNKNOWN_FLAG) *(.text) }
+}
+
+# CHECK: unrecognised flag: UNKNOWN_FLAG
+# CHECK-NEXT: >>>  .text : { INPUT_SECTION_FLAGS(UNKNOWN_FLAG) *(.text) }
+# CHECK-NEXT: >>>                                ^

diff  --git a/lld/test/ELF/input-section-flags-diag2.test b/lld/test/ELF/input-section-flags-diag2.test
new file mode 100644
index 000000000000..a83ab23d7040
--- /dev/null
+++ b/lld/test/ELF/input-section-flags-diag2.test
@@ -0,0 +1,13 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 /dev/null -o %t.o
+# RUN: not ld.lld -shared %t.o -o /dev/null --script %s 2>&1 | FileCheck -strict-whitespace %s
+
+## Check that we start with a flag
+
+SECTIONS {
+ .text : { INPUT_SECTION_FLAGS(& SHF_ALLOC) *(.text) }
+}
+
+# CHECK:  unrecognised flag: &
+# CHECK-NEXT: >>>  .text : { INPUT_SECTION_FLAGS(& SHF_ALLOC) *(.text) }
+# CHECK-NEXT: >>>                                ^

diff  --git a/lld/test/ELF/input-section-flags-diag3.test b/lld/test/ELF/input-section-flags-diag3.test
new file mode 100644
index 000000000000..2d16030fc10f
--- /dev/null
+++ b/lld/test/ELF/input-section-flags-diag3.test
@@ -0,0 +1,13 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 /dev/null -o %t.o
+# RUN: not ld.lld -shared %t.o -o /dev/null --script %s 2>&1 | FileCheck -strict-whitespace %s
+
+## Check that flags are separated by &
+
+SECTIONS {
+ .text : { INPUT_SECTION_FLAGS(SHF_ALLOC SHF_EXECINSTR) *(.text) }
+}
+
+// CHECK: expected & or )
+// CHECK-NEXT: >>>  .text : { INPUT_SECTION_FLAGS(SHF_ALLOC SHF_EXECINSTR) *(.text) }
+// CHECK-NEXT: >>>                                          ^

diff  --git a/lld/test/ELF/input-section-flags-keep.s b/lld/test/ELF/input-section-flags-keep.s
new file mode 100644
index 000000000000..51891892940d
--- /dev/null
+++ b/lld/test/ELF/input-section-flags-keep.s
@@ -0,0 +1,27 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+# RUN: echo "SECTIONS { \
+# RUN:  . = SIZEOF_HEADERS; \
+# RUN:  .keep : { KEEP( INPUT_SECTION_FLAGS(!SHF_WRITE) *(.sec*)) } \
+# RUN:  }" > %t.script
+# RUN: ld.lld --gc-sections -o %t --script %t.script %t.o
+# RUN: llvm-readobj --symbols %t | FileCheck %s
+
+## Check that INPUT_SECTION_FLAGS can be used within KEEP, and affects what
+## is kept.
+# CHECK: Name: keep
+# CHECK-NOT: NAME: collect
+.text
+.global _start
+_start:
+ .long 0
+
+.section .sec1, "a"
+.global keep
+keep:
+ .long 1
+
+.section .sec2, "aw"
+.global collect
+collect:
+ .long 2

diff  --git a/lld/test/ELF/input-section-flags.s b/lld/test/ELF/input-section-flags.s
new file mode 100644
index 000000000000..0c8e31c77b0d
--- /dev/null
+++ b/lld/test/ELF/input-section-flags.s
@@ -0,0 +1,115 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+
+## Test the INPUT_SECTION_FLAGS feature. It prefixes an input section list and
+## restricts matches to sections that have the required flags and do not have
+## any of the must not have flags.
+
+## Uniquely identify each .sec section by flag alone, with .text going into
+## to the SHF_EXECINSTR requiring .outsec2
+# RUN: echo "SECTIONS { \
+# RUN: .outsec1 : { INPUT_SECTION_FLAGS(SHF_ALLOC & !SHF_EXECINSTR & \
+# RUN:                                  !SHF_WRITE & !SHF_MERGE) *(.sec.*) } \
+# RUN: .outsec2 : { INPUT_SECTION_FLAGS(SHF_ALLOC & SHF_EXECINSTR & !SHF_WRITE\
+# RUN:                                  & !SHF_MERGE) *(.sec.* .text) } \
+# RUN: .outsec3 : { INPUT_SECTION_FLAGS(SHF_WRITE) *(.sec.*) } \
+# RUN: .outsec4 : { INPUT_SECTION_FLAGS(SHF_MERGE & !SHF_STRINGS) *(.sec.*) } \
+# RUN: .outsec5 : { INPUT_SECTION_FLAGS(SHF_STRINGS) *(.sec.*) } \
+# RUN: } " > %t.script
+# RUN: ld.lld -o %t1 --script %t.script %t.o
+# RUN: llvm-readobj --symbols %t1 | FileCheck %s
+# CHECK:  Name: _start
+# CHECK:  Section: .outsec2
+# CHECK:  Name: s1
+# CHECK:  Section: .outsec1
+# CHECK:  Name: s2
+# CHECK:  Section: .outsec2
+# CHECK:  Name: s3
+# CHECK:  Section: .outsec3
+# CHECK:  Name: s4
+# CHECK:  Section: .outsec4
+# CHECK:  Name: s5
+# CHECK:  Section: .outsec5
+
+## Same test but using OVERLAY.
+# RUN: echo "SECTIONS { \
+# RUN: OVERLAY 0x1000 : AT ( 0x4000 ) { \
+# RUN: .outsec1 { INPUT_SECTION_FLAGS(SHF_ALLOC & !SHF_EXECINSTR & \
+# RUN:                                !SHF_WRITE & !SHF_MERGE) *(.sec.*) }\
+# RUN: .outsec2 { INPUT_SECTION_FLAGS(SHF_ALLOC & SHF_EXECINSTR & !SHF_WRITE \
+# RUN:                                & !SHF_MERGE) *(.sec.* .text) } \
+# RUN: .outsec3 { INPUT_SECTION_FLAGS(SHF_WRITE) *(.sec.*) } \
+# RUN: .outsec4 { INPUT_SECTION_FLAGS(SHF_MERGE & !SHF_STRINGS) *(.sec.*) } \
+# RUN: .outsec5 { INPUT_SECTION_FLAGS(SHF_STRINGS) *(.sec.*) } \
+# RUN: } } " > %t2.script
+
+# RUN: ld.lld -o %t2 --script %t2.script %t.o
+# RUN: llvm-readobj --symbols %t2 | FileCheck %s
+
+## Same test but using hex representations of the flags.
+# RUN: echo "SECTIONS { \
+# RUN: .outsec1 : { INPUT_SECTION_FLAGS(0x2 & !0x4 & !0x1 & !0x10) *(.sec.*) }\
+# RUN: .outsec2 : { INPUT_SECTION_FLAGS(0x2 & 0x4 & !0x1 & !0x10) \
+# RUN:              *(.sec.* .text) } \
+# RUN: .outsec3 : { INPUT_SECTION_FLAGS(0x1) *(.sec.*) } \
+# RUN: .outsec4 : { INPUT_SECTION_FLAGS(0x10 & !0x20) *(.sec.*) } \
+# RUN: .outsec5 : { INPUT_SECTION_FLAGS(0x20) *(.sec.*) } \
+# RUN: } " > %t3.script
+
+# RUN: ld.lld -o %t3 --script %t3.script %t.o
+# RUN: llvm-readobj --symbols %t3 | FileCheck %s
+
+## Check that we can handle multiple InputSectionDescriptions in a single
+## OutputSection
+# RUN: echo "SECTIONS { \
+# RUN: .outsec1 : { INPUT_SECTION_FLAGS(SHF_ALLOC & !SHF_EXECINSTR & \
+# RUN:                                  !SHF_WRITE & !SHF_MERGE) *(.sec.*) ; \
+# RUN:              INPUT_SECTION_FLAGS(SHF_ALLOC & SHF_EXECINSTR & !SHF_WRITE\
+# RUN:                                  & !SHF_MERGE)  *(.sec.* *.text) }\
+# RUN: } " > %t4.script
+
+# RUN: ld.lld -o %t4 --script %t4.script %t.o
+# RUN: llvm-readobj --symbols %t4 | FileCheck --check-prefix MULTIPLE %s
+
+# MULTIPLE:  Name: _start
+# MULTIPLE:  Section: .outsec1
+# MULTIPLE:  Name: s1
+# MULTIPLE:  Section: .outsec1
+# MULTIPLE:  Name: s2
+# MULTIPLE:  Section: .outsec1
+# MULTIPLE:  Name: s3
+# MULTIPLE:  Section: .sec.aw
+# MULTIPLE:  Name: s4
+# MULTIPLE:  Section: .sec.aM
+# MULTIPLE:  Name: s5
+# MULTIPLE:  Section: .sec.aMS
+
+ .text
+ .global _start
+_start:
+ nop
+
+ .section .sec.a, "a", @progbits
+ .globl s1
+s1:
+ .long 1
+
+ .section .sec.ax, "ax", @progbits
+ .globl s2
+s2:
+ .long 2
+
+ .section .sec.aw, "aw", @progbits
+ .globl s3
+s3:
+ .long 3
+
+ .section .sec.aM, "aM", @progbits, 4
+ .globl s4
+s4:
+ .long 4
+
+ .section .sec.aMS, "aMS", @progbits, 1
+ .globl s5
+s5:
+ .asciz "a"