[PATCH] [lld] Teach LLD how to parse complete linker scripts

Wed Oct 22 16:46:30 PDT 2014

================
Comment at: include/lld/ReaderWriter/LinkerScript.h:70
@@ -41,1 +69,3 @@
+    lessequal,
+    greaterequal,
     kw_entry,
----------------
Sort in asciii-betical order.

================
Comment at: include/lld/ReaderWriter/LinkerScript.h:120
@@ -69,3 +119,3 @@
   bool canStartName(char c) const;
   bool canContinueName(char c) const;
   void skipWhitespace();
----------------
It should be done in a different patch, but these can* member functions have nothing to do with the Lexer class (they don't use member variables of the class etc). These should be non-member function.

================
Comment at: include/lld/ReaderWriter/LinkerScript.h:157
@@ +156,3 @@
+    InputSectionFile,
+    Overlay,
+  };
----------------
Sort.

================
Comment at: include/lld/ReaderWriter/LinkerScript.h:322
@@ +321,3 @@
+
+  virtual ~Expression() {}
+
----------------
Remove three blank lines above.

================
Comment at: include/lld/ReaderWriter/LinkerScript.h:333
@@ +332,3 @@
+public:
+  Constant(uint64_t num) : Expression(Kind::Constant), _num(num) {}
+  void dump(raw_ostream &os) const override;
----------------
Is constant always unsigned?

================
Comment at: include/lld/ReaderWriter/LinkerScript.h:392
@@ +391,3 @@
+    CompareEqual,
+    CompareDifferent
+  };
----------------
sort

================
Comment at: include/lld/ReaderWriter/LinkerScript.h:467
@@ +466,3 @@
+/// names is important to determine which sections go first.
+enum WildcardSortMode {
+  WSM_NA,
----------------
Can you make this enum class?

================
Comment at: include/lld/ReaderWriter/LinkerScript.h:478
@@ +477,3 @@
+/// Represents either a single input section name or a group of sorted input
+/// section names. They specify which sections to map to a given output section.
+class InputSection {
----------------
Add an example to the comment. My understanding of this class is it is representing ".text" or "SORT(.text*)" in 

  SECTIONS {
    .x: {  .text }
    .y { SORT(.text*) }
  }

================
Comment at: include/lld/ReaderWriter/LinkerScript.h:535
@@ +534,3 @@
+/// An output-section-command that maps a series of sections inside a given
+/// file to an output section.
+class InputSectionFile : public Command {
----------------
example here.

================
Comment at: include/lld/ReaderWriter/LinkerScript.h:567
@@ +566,3 @@
+/// A sections-command to specify which input sections compose a given output
+/// section.
+class OutputSectionDescription : public Command {
----------------
example here

================
Comment at: include/lld/ReaderWriter/LinkerScript.h:605
@@ +604,3 @@
+
+class Overlay : public Command {
+public:
----------------
Or, instead of adding example here, a link to the documentation is better?

For this, https://sourceware.org/binutils/docs/ld/Overlay-Description.html#Overlay-Description

================
Comment at: include/lld/ReaderWriter/LinkerScript.h:615
@@ +614,3 @@
+
+private:
+};
----------------
Remove

================
Comment at: include/lld/ReaderWriter/LinkerScript.h:675
@@ +674,3 @@
+  /// requires lexing additional tokens, store them in a private buffer.
+  const Token &peek(unsigned n) {
+    // Covers the case where the look ahead buffer contains the requested token
----------------
In this patch you pass only 1 to this function. Do you really need to look ahead arbitrary number of tokens? The buffer management looks a bit too complicated to me.

================
Comment at: include/lld/ReaderWriter/LinkerScript.h:756
@@ +755,3 @@
+
+  /// Parse "identifier(param [, param]...)"
+  ///
----------------
You want to mention that identifier and "(" were already read.

================
Comment at: include/lld/ReaderWriter/LinkerScript.h:793
@@ +792,3 @@
+  /// \p LHS points to the left-hand-side operand of this operator
+  /// \p maxPrecedence has the maximum operator precedence level that this parse
+  /// function is allowed to consume.
----------------
I think we don't usually use \p notation.

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:85
@@ -37,1 +84,3 @@
+    CASE(greaterequal)
+    CASE(unknown)
 #undef CASE
----------------
sort

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:96
@@ +95,3 @@
+    if (c < '0' || c > '9')
+      return llvm::ErrorOr<uint64_t>(std::make_error_code(std::errc::io_error));
+    res += c - '0';
----------------
I think there's an implicit conversion from std::error_code to llvm::ErrorOr. So you can just write

  return std::errc::io_error


================
Comment at: lib/ReaderWriter/LinkerScript.cpp:99
@@ +98,3 @@
+  }
+  return llvm::ErrorOr<uint64_t>(res);
+}
----------------
  return res;

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:134
@@ +133,3 @@
+      res += c - 'a' + 10;
+    else
+      res += c - 'A' + 10;
----------------
You can write

  else if (c >= 'A' && c <= 'F')
    res += c - 'A' + 10;
  else
    return llvm::ErrorOr<uint64_t>(std::make_error_code(std::errc::io_error));


================
Comment at: lib/ReaderWriter/LinkerScript.cpp:147
@@ +146,3 @@
+                     .StartsWith("0", octal)
+                     .Default(decimal);
+
----------------
This needs to be case insensitive.

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:156
@@ +155,3 @@
+    str = str.drop_back();
+  }
+
----------------
Instead of using two boolean values, you can define variable multiplier and set 1024 for K or 1024*1024 for M.

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:171
@@ +170,3 @@
+    str = str.drop_back();
+  }
+
----------------
Huh, I didn't know that the linker script supports this way of writing a number. Good to know.

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:176
@@ +175,3 @@
+  case hex:
+    if (str.startswith("0x"))
+      str = str.drop_front(2);
----------------
startswith_lower

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:190
@@ +189,3 @@
+  }
+  if (!res.getError()) {
+    if (suffixK)
----------------
Use early return here.

  if (res.getError())
     return res;

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:194
@@ +193,3 @@
+    else if (suffixM)
+      *res = *res << 20;
+  }
----------------
and multiply by the multiplier

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:211
@@ +210,3 @@
+  case '8':
+  case '9':
+    return true;
----------------
  return '0' <= c && c <= '9'

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:253
@@ +252,3 @@
+  case 'M':
+  case 'K':
+    return true;
----------------
You want to write multiple cases in one line. Or if expression may be shorter.

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:325
@@ +324,3 @@
+    tok = Token(_buffer.substr(0, 1), Token::l_brace);
+    _buffer = _buffer.drop_front();
+    return;
----------------
I think we need a function something like

  drop(StringRef &s, int n)

that (destructively) drops n characters from s and returns them.

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:456
@@ -110,3 +455,3 @@
     // simplicity.
     if ((_buffer[0] == '\"') || (_buffer[0] == '\'')) {
       char c = _buffer[0];
----------------
This line should be before default and 

  case '"': case '\'':

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:469
@@ +468,3 @@
+    if ((_buffer.startswith("0x") && _buffer.size() > 2 &&
+         canContinueNumber(_buffer[2])) ||
+        canStartNumber(_buffer[0])) {
----------------
This expression

  (_buffer.startswith("0x") && _buffer.size() > 2 && canContinueNumber(_buffer[2])

seems redundant, because if a string starts with "0x", it will always satisfy

  canStartNumber(_buffer[0])

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:552
@@ +551,3 @@
+            .Case("ONLY_IF_RW", Token::kw_only_if_rw)
+            .Case("/DISCARD/", Token::kw_discard)
+            .Case("ENTRY", Token::kw_entry)
----------------
Sort

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:748
@@ +747,3 @@
+    numParen = 1;
+    break;
+  case WSM_ByAlignment:
----------------
  return 1;

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:758
@@ +757,3 @@
+  case WSM_ByNameAndAlignment:
+    os << "SORT_BY_NAME(SORT_BY_ALIGNMENT";
+    numParen = 2;
----------------
Did you forget to add the trailing "("?

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:762
@@ +761,3 @@
+  case WSM_ByAlignmentAndName:
+    os << "SORT_BY_ALIGNMENT(SORT_BY_NAME";
+    numParen = 2;
----------------
Ditto

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:860
@@ +859,3 @@
+
+  os << "  { \n";
+  for (auto &command : _outputSectionCommands) {
----------------
remove space before \n

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:1002
@@ +1001,3 @@
+    switch (peek(1)._kind) {
+    case Token::l_paren:
+      return parseFunctionCall();
----------------
This can be

  if (peek(1)._kind == Token::l_paren)
    return parseFunctionCall();
  Symbol *sym = ...
  ...
  return sym;

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:1123
@@ +1122,3 @@
+
+const Expression *Parser::parseOperatorOperandLoop(const Expression *LHS,
+                                                   unsigned highestPrecedence) {
----------------
s/LHS/lhs/

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:1141
@@ +1140,3 @@
+    consumeToken();
+    const Expression *RHS = parseExpression(precedence - 1);
+    if (!RHS)
----------------
s/RHS/rhs/

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:1149
@@ +1148,3 @@
+
+const Expression *Parser::parseTernaryCondOp(const Expression *LHS) {
+  assert(_tok._kind == Token::question && "Expected question mark");
----------------
lhs

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:1334
@@ +1333,3 @@
+  if (_tok._kind == Token::kw_hidden || _tok._kind == Token::kw_provide ||
+      _tok._kind == Token::kw_provide_hidden) {
+    switch (_tok._kind) {
----------------
Remove this if

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:1361
@@ +1360,3 @@
+    kind = SymbolAssignment::Simple;
+    consumeToken();
+    break;
----------------
Move consumeToken()s after this switch statement.

================
Comment at: lib/ReaderWriter/LinkerScript.cpp:1589
@@ +1588,3 @@
+                         archiveSortMode, inputSections);
+  ;
+  consumeToken();
----------------
remove

http://reviews.llvm.org/D5852