[lld] [ELF] ScriptLexer: generate tokens lazily (PR #100493)
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 25 12:31:40 PDT 2024
================
@@ -191,92 +205,31 @@ StringRef ScriptLexer::skipSpace(StringRef s) {
}
// An erroneous token is handled as if it were the last token before EOF.
-bool ScriptLexer::atEOF() { return errorCount() || tokens.size() == pos; }
-
-// Split a given string as an expression.
-// This function returns "3", "*" and "5" for "3*5" for example.
-static std::vector<StringRef> tokenizeExpr(StringRef s) {
- StringRef ops = "!~*/+-<>?^:="; // List of operators
-
- // Quoted strings are literal strings, so we don't want to split it.
- if (s.starts_with("\""))
- return {s};
-
- // Split S with operators as separators.
- std::vector<StringRef> ret;
- while (!s.empty()) {
- size_t e = s.find_first_of(ops);
-
- // No need to split if there is no operator.
- if (e == StringRef::npos) {
- ret.push_back(s);
- break;
- }
-
- // Get a token before the operator.
- if (e != 0)
- ret.push_back(s.substr(0, e));
-
- // Get the operator as a token.
- // Keep !=, ==, >=, <=, << and >> operators as a single tokens.
- if (s.substr(e).starts_with("!=") || s.substr(e).starts_with("==") ||
- s.substr(e).starts_with(">=") || s.substr(e).starts_with("<=") ||
- s.substr(e).starts_with("<<") || s.substr(e).starts_with(">>")) {
- ret.push_back(s.substr(e, 2));
- s = s.substr(e + 2);
- } else {
- ret.push_back(s.substr(e, 1));
- s = s.substr(e + 1);
- }
- }
- return ret;
-}
-
-// In contexts where expressions are expected, the lexer should apply
-// different tokenization rules than the default one. By default,
-// arithmetic operator characters are regular characters, but in the
-// expression context, they should be independent tokens.
-//
-// For example, "foo*3" should be tokenized to "foo", "*" and "3" only
-// in the expression context.
-//
-// This function may split the current token into multiple tokens.
-void ScriptLexer::maybeSplitExpr() {
- if (!inExpr || errorCount() || atEOF())
- return;
-
- std::vector<StringRef> v = tokenizeExpr(tokens[pos]);
- if (v.size() == 1)
- return;
- tokens.erase(tokens.begin() + pos);
- tokens.insert(tokens.begin() + pos, v.begin(), v.end());
-}
+bool ScriptLexer::atEOF() { return eof || errorCount(); }
StringRef ScriptLexer::next() {
- maybeSplitExpr();
-
if (errorCount())
return "";
- if (atEOF()) {
- setError("unexpected EOF");
- return "";
- }
- return tokens[pos++];
+ prevTok = peek();
+ return std::exchange(curTok, StringRef(cur.s.data(), 0));
}
StringRef ScriptLexer::peek() {
- StringRef tok = next();
- if (errorCount())
- return "";
- pos = pos - 1;
- return tok;
+ // curTok is invalid if curTokState and inExpr mismatch.
----------------
MaskRay wrote:
Thanks for the suggestion!
https://github.com/llvm/llvm-project/pull/100493
More information about the llvm-commits
mailing list