[PATCH] [lld] Teach LLD how to parse complete linker scripts
Rui Ueyama
ruiu at google.com
Wed Oct 22 16:46:30 PDT 2014
================
Comment at: include/lld/ReaderWriter/LinkerScript.h:70
@@ -41,1 +69,3 @@
+ lessequal,
+ greaterequal,
kw_entry,
----------------
Sort in asciii-betical order.
================
Comment at: include/lld/ReaderWriter/LinkerScript.h:120
@@ -69,3 +119,3 @@
bool canStartName(char c) const;
bool canContinueName(char c) const;
void skipWhitespace();
----------------
It should be done in a different patch, but these can* member functions have nothing to do with the Lexer class (they don't use member variables of the class etc). These should be non-member function.
================
Comment at: include/lld/ReaderWriter/LinkerScript.h:157
@@ +156,3 @@
+ InputSectionFile,
+ Overlay,
+ };
----------------
Sort.
================
Comment at: include/lld/ReaderWriter/LinkerScript.h:322
@@ +321,3 @@
+
+ virtual ~Expression() {}
+
----------------
Remove three blank lines above.
================
Comment at: include/lld/ReaderWriter/LinkerScript.h:333
@@ +332,3 @@
+public:
+ Constant(uint64_t num) : Expression(Kind::Constant), _num(num) {}
+ void dump(raw_ostream &os) const override;
----------------
Is constant always unsigned?
================
Comment at: include/lld/ReaderWriter/LinkerScript.h:392
@@ +391,3 @@
+ CompareEqual,
+ CompareDifferent
+ };
----------------
sort
================
Comment at: include/lld/ReaderWriter/LinkerScript.h:467
@@ +466,3 @@
+/// names is important to determine which sections go first.
+enum WildcardSortMode {
+ WSM_NA,
----------------
Can you make this enum class?
================
Comment at: include/lld/ReaderWriter/LinkerScript.h:478
@@ +477,3 @@
+/// Represents either a single input section name or a group of sorted input
+/// section names. They specify which sections to map to a given output section.
+class InputSection {
----------------
Add an example to the comment. My understanding of this class is it is representing ".text" or "SORT(.text*)" in
SECTIONS {
.x: { .text }
.y { SORT(.text*) }
}
================
Comment at: include/lld/ReaderWriter/LinkerScript.h:535
@@ +534,3 @@
+/// An output-section-command that maps a series of sections inside a given
+/// file to an output section.
+class InputSectionFile : public Command {
----------------
example here.
================
Comment at: include/lld/ReaderWriter/LinkerScript.h:567
@@ +566,3 @@
+/// A sections-command to specify which input sections compose a given output
+/// section.
+class OutputSectionDescription : public Command {
----------------
example here
================
Comment at: include/lld/ReaderWriter/LinkerScript.h:605
@@ +604,3 @@
+
+class Overlay : public Command {
+public:
----------------
Or, instead of adding example here, a link to the documentation is better?
For this, https://sourceware.org/binutils/docs/ld/Overlay-Description.html#Overlay-Description
================
Comment at: include/lld/ReaderWriter/LinkerScript.h:615
@@ +614,3 @@
+
+private:
+};
----------------
Remove
================
Comment at: include/lld/ReaderWriter/LinkerScript.h:675
@@ +674,3 @@
+ /// requires lexing additional tokens, store them in a private buffer.
+ const Token &peek(unsigned n) {
+ // Covers the case where the look ahead buffer contains the requested token
----------------
In this patch you pass only 1 to this function. Do you really need to look ahead arbitrary number of tokens? The buffer management looks a bit too complicated to me.
================
Comment at: include/lld/ReaderWriter/LinkerScript.h:756
@@ +755,3 @@
+
+ /// Parse "identifier(param [, param]...)"
+ ///
----------------
You want to mention that identifier and "(" were already read.
================
Comment at: include/lld/ReaderWriter/LinkerScript.h:793
@@ +792,3 @@
+ /// \p LHS points to the left-hand-side operand of this operator
+ /// \p maxPrecedence has the maximum operator precedence level that this parse
+ /// function is allowed to consume.
----------------
I think we don't usually use \p notation.
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:85
@@ -37,1 +84,3 @@
+ CASE(greaterequal)
+ CASE(unknown)
#undef CASE
----------------
sort
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:96
@@ +95,3 @@
+ if (c < '0' || c > '9')
+ return llvm::ErrorOr<uint64_t>(std::make_error_code(std::errc::io_error));
+ res += c - '0';
----------------
I think there's an implicit conversion from std::error_code to llvm::ErrorOr. So you can just write
return std::errc::io_error
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:99
@@ +98,3 @@
+ }
+ return llvm::ErrorOr<uint64_t>(res);
+}
----------------
return res;
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:134
@@ +133,3 @@
+ res += c - 'a' + 10;
+ else
+ res += c - 'A' + 10;
----------------
You can write
else if (c >= 'A' && c <= 'F')
res += c - 'A' + 10;
else
return llvm::ErrorOr<uint64_t>(std::make_error_code(std::errc::io_error));
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:147
@@ +146,3 @@
+ .StartsWith("0", octal)
+ .Default(decimal);
+
----------------
This needs to be case insensitive.
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:156
@@ +155,3 @@
+ str = str.drop_back();
+ }
+
----------------
Instead of using two boolean values, you can define variable multiplier and set 1024 for K or 1024*1024 for M.
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:171
@@ +170,3 @@
+ str = str.drop_back();
+ }
+
----------------
Huh, I didn't know that the linker script supports this way of writing a number. Good to know.
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:176
@@ +175,3 @@
+ case hex:
+ if (str.startswith("0x"))
+ str = str.drop_front(2);
----------------
startswith_lower
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:190
@@ +189,3 @@
+ }
+ if (!res.getError()) {
+ if (suffixK)
----------------
Use early return here.
if (res.getError())
return res;
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:194
@@ +193,3 @@
+ else if (suffixM)
+ *res = *res << 20;
+ }
----------------
and multiply by the multiplier
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:211
@@ +210,3 @@
+ case '8':
+ case '9':
+ return true;
----------------
return '0' <= c && c <= '9'
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:253
@@ +252,3 @@
+ case 'M':
+ case 'K':
+ return true;
----------------
You want to write multiple cases in one line. Or if expression may be shorter.
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:325
@@ +324,3 @@
+ tok = Token(_buffer.substr(0, 1), Token::l_brace);
+ _buffer = _buffer.drop_front();
+ return;
----------------
I think we need a function something like
drop(StringRef &s, int n)
that (destructively) drops n characters from s and returns them.
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:456
@@ -110,3 +455,3 @@
// simplicity.
if ((_buffer[0] == '\"') || (_buffer[0] == '\'')) {
char c = _buffer[0];
----------------
This line should be before default and
case '"': case '\'':
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:469
@@ +468,3 @@
+ if ((_buffer.startswith("0x") && _buffer.size() > 2 &&
+ canContinueNumber(_buffer[2])) ||
+ canStartNumber(_buffer[0])) {
----------------
This expression
(_buffer.startswith("0x") && _buffer.size() > 2 && canContinueNumber(_buffer[2])
seems redundant, because if a string starts with "0x", it will always satisfy
canStartNumber(_buffer[0])
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:552
@@ +551,3 @@
+ .Case("ONLY_IF_RW", Token::kw_only_if_rw)
+ .Case("/DISCARD/", Token::kw_discard)
+ .Case("ENTRY", Token::kw_entry)
----------------
Sort
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:748
@@ +747,3 @@
+ numParen = 1;
+ break;
+ case WSM_ByAlignment:
----------------
return 1;
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:758
@@ +757,3 @@
+ case WSM_ByNameAndAlignment:
+ os << "SORT_BY_NAME(SORT_BY_ALIGNMENT";
+ numParen = 2;
----------------
Did you forget to add the trailing "("?
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:762
@@ +761,3 @@
+ case WSM_ByAlignmentAndName:
+ os << "SORT_BY_ALIGNMENT(SORT_BY_NAME";
+ numParen = 2;
----------------
Ditto
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:860
@@ +859,3 @@
+
+ os << " { \n";
+ for (auto &command : _outputSectionCommands) {
----------------
remove space before \n
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:1002
@@ +1001,3 @@
+ switch (peek(1)._kind) {
+ case Token::l_paren:
+ return parseFunctionCall();
----------------
This can be
if (peek(1)._kind == Token::l_paren)
return parseFunctionCall();
Symbol *sym = ...
...
return sym;
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:1123
@@ +1122,3 @@
+
+const Expression *Parser::parseOperatorOperandLoop(const Expression *LHS,
+ unsigned highestPrecedence) {
----------------
s/LHS/lhs/
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:1141
@@ +1140,3 @@
+ consumeToken();
+ const Expression *RHS = parseExpression(precedence - 1);
+ if (!RHS)
----------------
s/RHS/rhs/
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:1149
@@ +1148,3 @@
+
+const Expression *Parser::parseTernaryCondOp(const Expression *LHS) {
+ assert(_tok._kind == Token::question && "Expected question mark");
----------------
lhs
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:1334
@@ +1333,3 @@
+ if (_tok._kind == Token::kw_hidden || _tok._kind == Token::kw_provide ||
+ _tok._kind == Token::kw_provide_hidden) {
+ switch (_tok._kind) {
----------------
Remove this if
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:1361
@@ +1360,3 @@
+ kind = SymbolAssignment::Simple;
+ consumeToken();
+ break;
----------------
Move consumeToken()s after this switch statement.
================
Comment at: lib/ReaderWriter/LinkerScript.cpp:1589
@@ +1588,3 @@
+ archiveSortMode, inputSections);
+ ;
+ consumeToken();
----------------
remove
http://reviews.llvm.org/D5852
More information about the llvm-commits
mailing list