[lld] 5c3c0a8 - [ELF] Replace inExpr with lexState. NFC
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 1 15:49:13 PST 2025
Author: Fangrui Song
Date: 2025-02-01T15:49:08-08:00
New Revision: 5c3c0a8cec9205efba78583bd0b8a646270968a1
URL: https://github.com/llvm/llvm-project/commit/5c3c0a8cec9205efba78583bd0b8a646270968a1
DIFF: https://github.com/llvm/llvm-project/commit/5c3c0a8cec9205efba78583bd0b8a646270968a1.diff
LOG: [ELF] Replace inExpr with lexState. NFC
We may add another state State::Wild to behave more lik GNU ld.
Added:
Modified:
lld/ELF/ScriptLexer.cpp
lld/ELF/ScriptLexer.h
lld/ELF/ScriptParser.cpp
Removed:
################################################################################
diff --git a/lld/ELF/ScriptLexer.cpp b/lld/ELF/ScriptLexer.cpp
index e0adf2e5b0c8cc..8db44f55505f84 100644
--- a/lld/ELF/ScriptLexer.cpp
+++ b/lld/ELF/ScriptLexer.cpp
@@ -105,7 +105,7 @@ void ScriptLexer::lex() {
curBuf = buffers.pop_back_val();
continue;
}
- curTokState = inExpr;
+ curTokState = lexState;
// Quoted token. Note that double-quote characters are parts of a token
// because, in a glob match context, only unquoted tokens are interpreted
@@ -142,7 +142,13 @@ void ScriptLexer::lex() {
// C-like languages, so that you can write "file-name.cpp" as one bare
// token.
size_t pos;
- if (inExpr) {
+ switch (lexState) {
+ case State::Script:
+ pos = s.find_first_not_of(
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+ "0123456789_.$/\\~=+[]*?-!^:");
+ break;
+ case State::Expr:
pos = s.find_first_not_of(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
"0123456789_.$");
@@ -150,10 +156,7 @@ void ScriptLexer::lex() {
((s[0] == s[1] && strchr("<>&|", s[0])) ||
is_contained({"==", "!=", "<=", ">=", "<<", ">>"}, s.substr(0, 2))))
pos = 2;
- } else {
- pos = s.find_first_not_of(
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
- "0123456789_.$/\\~=+[]*?-!^:");
+ break;
}
if (pos == 0)
@@ -208,8 +211,8 @@ StringRef ScriptLexer::next() {
}
StringRef ScriptLexer::peek() {
- // curTok is invalid if curTokState and inExpr mismatch.
- if (curTok.size() && curTokState != inExpr) {
+ // curTok is invalid if curTokState and lexState mismatch.
+ if (curTok.size() && curTokState != lexState) {
curBuf.s = StringRef(curTok.data(), curBuf.s.end() - curTok.data());
curTok = {};
}
diff --git a/lld/ELF/ScriptLexer.h b/lld/ELF/ScriptLexer.h
index d689a7e108f93e..be691022f53854 100644
--- a/lld/ELF/ScriptLexer.h
+++ b/lld/ELF/ScriptLexer.h
@@ -41,6 +41,11 @@ class ScriptLexer {
// Used to detect INCLUDE() cycles.
llvm::DenseSet<StringRef> activeFilenames;
+ enum class State {
+ Script,
+ Expr,
+ };
+
struct Token {
StringRef str;
explicit operator bool() const { return !str.empty(); }
@@ -54,8 +59,9 @@ class ScriptLexer {
// expression state changes.
StringRef curTok;
size_t prevTokLine = 1;
- // The inExpr state when curTok is cached.
- bool curTokState = false;
+ // The lex state when curTok is cached.
+ State curTokState = State::Script;
+ State lexState = State::Script;
bool eof = false;
public:
@@ -75,7 +81,6 @@ class ScriptLexer {
MemoryBufferRef getCurrentMB();
std::vector<MemoryBufferRef> mbs;
- bool inExpr = false;
private:
StringRef getLine();
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index 06a22613ee93ac..a10af9565a1d63 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -289,7 +289,7 @@ void ScriptParser::readLinkerScript() {
void ScriptParser::readDefsym() {
if (errCount(ctx))
return;
- inExpr = true;
+ SaveAndRestore saved(lexState, State::Expr);
StringRef name = readName();
expect("=");
Expr e = readExpr();
@@ -954,8 +954,8 @@ bool ScriptParser::readSectionDirective(OutputSection *cmd, StringRef tok) {
// https://sourceware.org/binutils/docs/ld/Output-Section-Type.html
void ScriptParser::readSectionAddressType(OutputSection *cmd) {
if (consume("(")) {
- // Temporarily set inExpr to support TYPE=<value> without spaces.
- SaveAndRestore saved(inExpr, true);
+ // Temporarily set lexState to support TYPE=<value> without spaces.
+ SaveAndRestore saved(lexState, State::Expr);
if (readSectionDirective(cmd, peek()))
return;
cmd->addrExpr = readExpr();
@@ -965,7 +965,7 @@ void ScriptParser::readSectionAddressType(OutputSection *cmd) {
}
if (consume("(")) {
- SaveAndRestore saved(inExpr, true);
+ SaveAndRestore saved(lexState, State::Expr);
StringRef tok = peek();
if (!readSectionDirective(cmd, tok))
setError("unknown section directive: " + tok);
@@ -1087,10 +1087,10 @@ OutputDesc *ScriptParser::readOutputSectionDescription(StringRef outSec) {
osec->phdrs = readOutputSectionPhdrs();
if (peek() == "=" || peek().starts_with("=")) {
- inExpr = true;
+ lexState = State::Expr;
consume("=");
osec->filler = readFill();
- inExpr = false;
+ lexState = State::Script;
}
// Consume optional comma following output section command.
@@ -1162,7 +1162,7 @@ SymbolAssignment *ScriptParser::readAssignment(StringRef tok) {
bool savedSeenRelroEnd = ctx.script->seenRelroEnd;
const StringRef op = peek();
{
- SaveAndRestore saved(inExpr, true);
+ SaveAndRestore saved(lexState, State::Expr);
if (op.starts_with("=")) {
// Support = followed by an expression without whitespace.
cmd = readSymbolAssignment(unquote(tok));
@@ -1235,7 +1235,7 @@ SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef name) {
Expr ScriptParser::readExpr() {
// Our lexer is context-aware. Set the in-expression bit so that
// they apply
diff erent tokenization rules.
- SaveAndRestore saved(inExpr, true);
+ SaveAndRestore saved(lexState, State::Expr);
Expr e = readExpr1(readPrimary(), 0);
return e;
}
@@ -1452,12 +1452,11 @@ std::pair<uint64_t, uint64_t> ScriptParser::readInputSectionFlags() {
StringRef ScriptParser::readParenName() {
expect("(");
- bool orig = inExpr;
- inExpr = false;
- StringRef tok = readName();
- inExpr = orig;
+ auto saved = std::exchange(lexState, State::Script);
+ StringRef name = readName();
+ lexState = saved;
expect(")");
- return tok;
+ return name;
}
static void checkIfExists(LinkerScript &script, const OutputSection &osec,
More information about the llvm-commits
mailing list