[lld] r295225 - Apply different tokenization rules to linker script expressions.
Rui Ueyama via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 15 11:58:18 PST 2017
Author: ruiu
Date: Wed Feb 15 13:58:17 2017
New Revision: 295225
URL: http://llvm.org/viewvc/llvm-project?rev=295225&view=rev
Log:
Apply different tokenization rules to linker script expressions.
The linker script lexer is context-sensitive. In the regular context,
arithmetic operator characters are regular characters, but in the
expression context, they are independent tokens. This afects how the
lexer tokenizes "3*4", for example. (This kind of expression is real;
the Linux kernel uses it.)
This patch defines function `maybeSplitExpr`. This function splits the
current token into multiple expression tokens if the lexer is in the
expression context.
Differential Revision: https://reviews.llvm.org/D29963
Modified:
lld/trunk/ELF/LinkerScript.cpp
lld/trunk/ELF/ScriptLexer.cpp
lld/trunk/ELF/ScriptLexer.h
lld/trunk/test/ELF/linkerscript/operators.s
Modified: lld/trunk/ELF/LinkerScript.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/LinkerScript.cpp?rev=295225&r1=295224&r2=295225&view=diff
==============================================================================
--- lld/trunk/ELF/LinkerScript.cpp (original)
+++ lld/trunk/ELF/LinkerScript.cpp Wed Feb 15 13:58:17 2017
@@ -1610,9 +1610,7 @@ SymbolAssignment *ScriptParser::readAssi
Expr E;
assert(Op == "=" || Op == "+=");
if (consume("ABSOLUTE")) {
- // The RHS may be something like "ABSOLUTE(.) & 0xff".
- // Call readExpr1 to read the whole expression.
- E = readExpr1(readParenExpr(), 0);
+ E = readExpr();
E.IsAbsolute = [] { return true; };
} else {
E = readExpr();
@@ -1628,7 +1626,15 @@ SymbolAssignment *ScriptParser::readAssi
// This is an operator-precedence parser to parse a linker
// script expression.
-Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); }
+Expr ScriptParser::readExpr() {
+ // Our lexer is context-aware. Set the in-expression bit so that
+ // they apply different tokenization rules.
+ bool Orig = InExpr;
+ InExpr = true;
+ Expr E = readExpr1(readPrimary(), 0);
+ InExpr = Orig;
+ return E;
+}
static Expr combine(StringRef Op, Expr L, Expr R) {
auto IsAbs = [=] { return L.IsAbsolute() && R.IsAbsolute(); };
Modified: lld/trunk/ELF/ScriptLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/ScriptLexer.cpp?rev=295225&r1=295224&r2=295225&view=diff
==============================================================================
--- lld/trunk/ELF/ScriptLexer.cpp (original)
+++ lld/trunk/ELF/ScriptLexer.cpp Wed Feb 15 13:58:17 2017
@@ -26,18 +26,9 @@
// lookahead is labels in version scripts, where we need to parse "local :"
// as if "local:".
//
-// Overall, this lexer works fine for most linker scripts. There's room
-// for improving compatibility, but that's probably not at the top of our
-// todo list.
-//
-// A caveat: This lexer splits an input string into tokens ahead of time,
-// so the lexer is not context aware. There's one known corner case. Let's
-// say the next string is "val*3" (without quotes). In the context where
-// the parser is expecting an expression, that should be tokenizes to
-// "val", "*" and "3". In other context, it should be just a single
-// token. (If it is in a filename context, it'll be interpeted as a glob
-// pattern, for example.) We want to fix this, but it probably needs a
-// redesign of this lexer.
+// Overall, this lexer works fine for most linker scripts. There might
+// be room for improving compatibility, but that's probably not at the
+// top of our todo list.
//
//===----------------------------------------------------------------------===//
@@ -175,7 +166,60 @@ StringRef ScriptLexer::skipSpace(StringR
// An erroneous token is handled as if it were the last token before EOF.
bool ScriptLexer::atEOF() { return Error || Tokens.size() == Pos; }
+// Split a given string as an expression.
+// This function returns "3", "*" and "5" for "3*5" for example.
+static std::vector<StringRef> tokenizeExpr(StringRef S) {
+ StringRef Ops = "+-*/"; // List of operators
+
+ // Quoted strings are literal strings, so we don't want to split it.
+ if (S.startswith("\""))
+ return {S};
+
+ // Split S with +-*/ as separators.
+ std::vector<StringRef> Ret;
+ while (!S.empty()) {
+ size_t E = S.find_first_of(Ops);
+
+ // No need to split if there is no operator.
+ if (E == StringRef::npos) {
+ Ret.push_back(S);
+ break;
+ }
+
+ // Get a token before the opreator.
+ if (E != 0)
+ Ret.push_back(S.substr(0, E));
+
+ // Get the operator as a token.
+ Ret.push_back(S.substr(E, 1));
+ S = S.substr(E + 1);
+ }
+ return Ret;
+}
+
+// In contexts where expressions are expected, the lexer should apply
+// different tokenization rules than the default one. By default,
+// arithmetic operator characters are regular characters, but in the
+// expression context, they should be independent tokens.
+//
+// For example, "foo*3" should be tokenized to "foo", "*" and "3" only
+// in the expression context.
+//
+// This function may split the current token into multiple tokens.
+void ScriptLexer::maybeSplitExpr() {
+ if (!InExpr || Error || atEOF())
+ return;
+
+ std::vector<StringRef> V = tokenizeExpr(Tokens[Pos]);
+ if (V.size() == 1)
+ return;
+ Tokens.erase(Tokens.begin() + Pos);
+ Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end());
+}
+
StringRef ScriptLexer::next() {
+ maybeSplitExpr();
+
if (Error)
return "";
if (atEOF()) {
Modified: lld/trunk/ELF/ScriptLexer.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/ScriptLexer.h?rev=295225&r1=295224&r2=295225&view=diff
==============================================================================
--- lld/trunk/ELF/ScriptLexer.h (original)
+++ lld/trunk/ELF/ScriptLexer.h Wed Feb 15 13:58:17 2017
@@ -36,10 +36,12 @@ public:
std::vector<MemoryBufferRef> MBs;
std::vector<StringRef> Tokens;
+ bool InExpr = false;
size_t Pos = 0;
bool Error = false;
private:
+ void maybeSplitExpr();
StringRef getLine();
size_t getLineNumber();
size_t getColumnNumber();
Modified: lld/trunk/test/ELF/linkerscript/operators.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/linkerscript/operators.s?rev=295225&r1=295224&r2=295225&view=diff
==============================================================================
--- lld/trunk/test/ELF/linkerscript/operators.s (original)
+++ lld/trunk/test/ELF/linkerscript/operators.s Wed Feb 15 13:58:17 2017
@@ -5,6 +5,7 @@
# RUN: minus = 5 - 1; \
# RUN: div = 6 / 2; \
# RUN: mul = 1 + 2 * 3; \
+# RUN: nospace = 1+2*6/3; \
# RUN: braces = 1 + (2 + 3) * 4; \
# RUN: and = 0xbb & 0xee; \
# RUN: ternary1 = 1 ? 1 : 2; \
@@ -32,6 +33,7 @@
# CHECK: 00000000000004 *ABS* 00000000 minus
# CHECK: 00000000000003 *ABS* 00000000 div
# CHECK: 00000000000007 *ABS* 00000000 mul
+# CHECK: 00000000000005 *ABS* 00000000 nospace
# CHECK: 00000000000015 *ABS* 00000000 braces
# CHECK: 000000000000aa *ABS* 00000000 and
# CHECK: 00000000000001 *ABS* 00000000 ternary1
More information about the llvm-commits
mailing list