[lld] 2a9aed0 - [ELF] Support multiple SORT in an input section description

Thu Nov 12 08:47:00 PST 2020

Author: Fangrui Song
Date: 2020-11-12T08:46:53-08:00
New Revision: 2a9aed0e8b538955f38f036bd34ea79adfce2ad7

URL: https://github.com/llvm/llvm-project/commit/2a9aed0e8b538955f38f036bd34ea79adfce2ad7
DIFF: https://github.com/llvm/llvm-project/commit/2a9aed0e8b538955f38f036bd34ea79adfce2ad7.diff

LOG: [ELF] Support multiple SORT in an input section description

The second `SORT` in `*(SORT(...) SORT(...))` is incorrectly parsed as a file pattern.
Fix the bug by stopping at `SORT*` in `readInputSectionsList`.

Reviewed By: grimar

Differential Revision: https://reviews.llvm.org/D91180

Added: 
    

Modified: 
    lld/ELF/ScriptParser.cpp
    lld/test/ELF/linkerscript/sort2.s
    lld/test/ELF/linkerscript/wildcards.s

Removed: 
    


################################################################################
diff  --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index 5a8131166b14..c68d7f4f62e1 100644

--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -102,6 +102,7 @@ class ScriptParser final : ScriptLexer {
                                                  uint64_t withFlags,
                                                  uint64_t withoutFlags);
   unsigned readPhdrType();
+  SortSectionPolicy peekSortKind();
   SortSectionPolicy readSortKind();
   SymbolAssignment *readProvideHidden(bool provide, bool hidden);
   SymbolAssignment *readAssignment(StringRef tok);
@@ -618,16 +619,20 @@ StringMatcher ScriptParser::readFilePatterns() {
   return Matcher;
 }
 
+SortSectionPolicy ScriptParser::peekSortKind() {
+  return StringSwitch<SortSectionPolicy>(peek())
+      .Cases("SORT", "SORT_BY_NAME", SortSectionPolicy::Name)
+      .Case("SORT_BY_ALIGNMENT", SortSectionPolicy::Alignment)
+      .Case("SORT_BY_INIT_PRIORITY", SortSectionPolicy::Priority)
+      .Case("SORT_NONE", SortSectionPolicy::None)
+      .Default(SortSectionPolicy::Default);
+}
+
 SortSectionPolicy ScriptParser::readSortKind() {
-  if (consume("SORT") || consume("SORT_BY_NAME"))
-    return SortSectionPolicy::Name;
-  if (consume("SORT_BY_ALIGNMENT"))
-    return SortSectionPolicy::Alignment;
-  if (consume("SORT_BY_INIT_PRIORITY"))
-    return SortSectionPolicy::Priority;
-  if (consume("SORT_NONE"))
-    return SortSectionPolicy::None;
-  return SortSectionPolicy::Default;
+  SortSectionPolicy ret = peekSortKind();
+  if (ret != SortSectionPolicy::Default)
+    skip();
+  return ret;
 }
 
 // Reads SECTIONS command contents in the following form:
@@ -653,11 +658,15 @@ std::vector<SectionPattern> ScriptParser::readInputSectionsList() {
     }
 
     StringMatcher SectionMatcher;
-    while (!errorCount() && peek() != ")" && peek() != "EXCLUDE_FILE")
+    // Break if the next token is ), EXCLUDE_FILE, or SORT*.
+    while (!errorCount() && peek() != ")" && peek() != "EXCLUDE_FILE" &&
+           peekSortKind() == SortSectionPolicy::Default)
       SectionMatcher.addPattern(unquote(next()));
 
     if (!SectionMatcher.empty())
       ret.push_back({std::move(excludeFilePat), std::move(SectionMatcher)});
+    else if (excludeFilePat.empty())
+      break;
     else
       setError("section pattern is expected");
   }

diff  --git a/lld/test/ELF/linkerscript/sort2.s b/lld/test/ELF/linkerscript/sort2.s
index 1d42093d5aa0..732f96d39433 100644
--- a/lld/test/ELF/linkerscript/sort2.s
+++ b/lld/test/ELF/linkerscript/sort2.s
@@ -1,39 +1,36 @@
 # REQUIRES: x86
 # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %tfile1.o
 
-# RUN: echo "SECTIONS { .abc : { *(SORT(.foo.*) .bar.*) } }" > %t1.script
+# RUN: echo "SECTIONS { .abc : { *(SORT(.foo.*) .a* .a* SORT(.bar.*) .b*) } }" > %t1.script
 # RUN: ld.lld -o %t1 --script %t1.script %tfile1.o
-# RUN: llvm-objdump -s %t1 | FileCheck %s
+# RUN: llvm-readelf -x .abc %t1 | FileCheck %s
 
-# CHECK:  Contents of section .abc:
-# CHECK:   01000000 00000000 02000000 00000000
-# CHECK:   03000000 00000000 04000000 00000000
-# CHECK:   06000000 00000000 05000000 00000000
+## FIXME Some input sections are duplicated in .abc and their second occurrences are zeros.
+# CHECK:      Hex dump of section '.abc'
+# CHECK-NEXT: 0x00000000 01020306 05040000 00070908 0b0c0a
 
 # RUN: echo "SECTIONS { \
-# RUN:   .abc : { *(SORT(.foo.* EXCLUDE_FILE (*file1.o) .bar.*) .bar.*) } \
+# RUN:   .abc : { *(SORT(.foo.* EXCLUDE_FILE (*file1.o) .bar.*) .a* SORT(.bar.*) .b*) } \
 # RUN:  }" > %t2.script
 # RUN: ld.lld -o %t2 --script %t2.script %tfile1.o
-# RUN: llvm-objdump -s %t2 | FileCheck %s
+# RUN: llvm-readelf -x .abc %t2 | FileCheck %s
 
 .text
 .globl _start
 _start:
 
-.section .foo.2,"a"
- .quad 2
+.section .foo.2,"a"; .byte 2
+.section .foo.3,"a"; .byte 3
+.section .foo.1,"a"; .byte 1
 
-.section .foo.3,"a"
- .quad 3
+.section .a6,"a"; .byte 6
+.section .a5,"a"; .byte 5
+.section .a4,"a"; .byte 4
 
-.section .foo.1,"a"
- .quad 1
+.section .bar.7,"a"; .byte 7
+.section .bar.9,"a"; .byte 9
+.section .bar.8,"a"; .byte 8
 
-.section .bar.4,"a"
- .quad 4
-
-.section .bar.6,"a"
- .quad 6
-
-.section .bar.5,"a"
- .quad 5
+.section .b11,"a"; .byte 11
+.section .b12,"a"; .byte 12
+.section .b10,"a"; .byte 10

diff  --git a/lld/test/ELF/linkerscript/wildcards.s b/lld/test/ELF/linkerscript/wildcards.s
index 43533564c081..c7fbcab44877 100644
--- a/lld/test/ELF/linkerscript/wildcards.s
+++ b/lld/test/ELF/linkerscript/wildcards.s
@@ -1,10 +1,11 @@
 # REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %t/asm -o %t.o
 
 ## Default case: abc and abx included in text.
 # RUN: echo "SECTIONS { \
 # RUN:      .text : { *(.abc .abx) } }" > %t.script
-# RUN: ld.lld -o %t.out --script %t.script %t
+# RUN: ld.lld -o %t.out --script %t.script %t.o
 # RUN: llvm-objdump --section-headers %t.out | \
 # RUN:   FileCheck -check-prefix=SEC-DEFAULT %s
 # SEC-DEFAULT:      Sections:
@@ -22,14 +23,14 @@
 ## Now replace the symbol with '?' and check that results are the same.
 # RUN: echo "SECTIONS { \
 # RUN:      .text : { *(.abc .ab?) } }" > %t.script
-# RUN: ld.lld -o %t.out --script %t.script %t
+# RUN: ld.lld -o %t.out --script %t.script %t.o
 # RUN: llvm-objdump --section-headers %t.out | \
 # RUN:   FileCheck -check-prefix=SEC-DEFAULT %s
 
 ## Now see how replacing '?' with '*' will consume whole abcd.
 # RUN: echo "SECTIONS { \
 # RUN:      .text : { *(.abc .ab*) } }" > %t.script
-# RUN: ld.lld -o %t.out --script %t.script %t
+# RUN: ld.lld -o %t.out --script %t.script %t.o
 # RUN: llvm-objdump --section-headers %t.out | \
 # RUN:   FileCheck -check-prefix=SEC-ALL %s
 # SEC-ALL:      Sections:
@@ -46,7 +47,7 @@
 ## All sections started with .a are merged.
 # RUN: echo "SECTIONS { \
 # RUN:      .text : { *(.a*) } }" > %t.script
-# RUN: ld.lld -o %t.out --script %t.script %t
+# RUN: ld.lld -o %t.out --script %t.script %t.o
 # RUN: llvm-objdump --section-headers %t.out | \
 # RUN:   FileCheck -check-prefix=SEC-NO %s
 # SEC-NO: Sections:
@@ -58,6 +59,7 @@
 # SEC-NO-NEXT:   4 .shstrtab     0000002a
 # SEC-NO-NEXT:   5 .strtab       00000008
 
+#--- asm
 .text
 .section .abc,"ax", at progbits
 .long 0
@@ -81,3 +83,11 @@
 
 .globl _start
 _start:
+
+#--- lparen.lds
+## ( is recognized as a section name pattern. Note, ( is rejected by GNU ld.
+# RUN: ld.lld -T %t/lparen.lds %t.o -o %t.out
+# RUN: llvm-objdump --section-headers %t.out | FileCheck --check-prefix=SEC-NO %s
+SECTIONS {
+ .text : { *(.a* ( ) }
+}