[lld] [ELF] Added `struct Token` and changed `next()` and `peek()` to return Token (PR #100180)
Daniel Thornburgh via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 24 10:56:14 PDT 2024
================
@@ -155,15 +171,200 @@ void ScriptLexer::tokenize(MemoryBufferRef mb) {
// A character that cannot start a word (which is usually a
// punctuation) forms a single character token.
- if (pos == 0)
+ if (pos == 0) {
pos = 1;
- vec.push_back(s.substr(0, pos));
+ vec.push_back(getOperatorToken(s));
+ } else {
+ vec.push_back(getKeywordorIdentifier(s.substr(0, pos)));
+ }
s = s.substr(pos);
}
tokens.insert(tokens.begin() + pos, vec.begin(), vec.end());
}
+ScriptLexer::Token ScriptLexer::getOperatorToken(StringRef s) {
+ auto createToken = [&](Tok kind, size_t pos) -> Token {
+ return {kind, s.substr(0, pos)};
+ };
+
+ switch (s.front()) {
+ case EOF:
+ return createToken(Tok::Eof, 0);
+ case '(':
+ return createToken(Tok::LeftParenthesis, 1);
+ case ')':
+ return createToken(Tok::RightParenthesis, 1);
+ case '{':
+ return createToken(Tok::LeftCurlyBracket, 1);
+ case '}':
+ return createToken(Tok::RightCurlyBracket, 1);
+ case ';':
+ return createToken(Tok::Semicolon, 1);
+ case ',':
+ return createToken(Tok::Comma, 1);
+ case ':':
+ return createToken(Tok::Colon, 1);
+ case '?':
+ return createToken(Tok::Question, 1);
+ case '%':
+ return createToken(Tok::Percent, 1);
+ case '!':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Tok::NotEqual, 2);
+ return createToken(Tok::Excalamation, 1);
+ case '*':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Tok::MulAssign, 2);
+ return createToken(Tok::Asterisk, 1);
+ case '/':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Tok::DivAssign, 2);
+ return createToken(Tok::Slash, 1);
+ case '=':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Tok::Equal, 2);
+ return createToken(Tok::Assign, 1);
+ case '+':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Tok::PlusAssign, 2);
+ return createToken(Tok::Plus, 1);
+ case '-':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Tok::MinusAssign, 2);
+ return createToken(Tok::Minus, 1);
+ case '<':
+ if (s.size() > 2 && s[1] == s[0] && s[2] == '=')
+ return createToken(Tok::LeftShiftAssign, 3);
+ if (s.size() > 1) {
+ if (s[1] == '=')
+ return createToken(Tok::LessEqual, 2);
+ if (s[1] == '<')
+ return createToken(Tok::LeftShift, 2);
+ }
+ return createToken(Tok::Less, 1);
+ case '>':
+ if (s.size() > 2 && s[1] == s[0] && s[2] == '=')
+ return createToken(Tok::RightShiftAssign, 3);
+ if (s.size() > 1) {
+ if (s[1] == '=')
+ return createToken(Tok::GreaterEqual, 2);
+ if (s[1] == '>')
+ return createToken(Tok::RightShift, 2);
+ }
+ return createToken(Tok::Greater, 1);
+ case '&':
+ if (s.size() > 1) {
+ if (s[1] == '=')
+ return createToken(Tok::AndAssign, 2);
+ if (s[1] == '&')
+ return createToken(Tok::LogicalAnd, 2);
+ }
+ return createToken(Tok::BitwiseAnd, 1);
+ case '^':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Tok::XorAssign, 2);
+ return createToken(Tok::BitwiseXor, 1);
+ case '|':
+ if (s.size() > 1) {
+ if (s[1] == '=')
+ return createToken(Tok::OrAssign, 2);
+ if (s[1] == '|')
+ return createToken(Tok::LogicalOr, 2);
+ }
+ return createToken(Tok::BitwiseOr, 1);
+ case '.':
+ return createToken(Tok::Dot, 1);
+ case '_':
+ return createToken(Tok::Underscore, 1);
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ return createToken(Tok::Decimal, 1);
+ default:
+ return {Tok::Identifier, s};
+ }
+}
+
+const llvm::StringMap<Tok> ScriptLexer::keywordTokMap = {
+ {"ENTRY", Tok::Entry},
+ {"INPUT", Tok::Input},
+ {"GROUP", Tok::Group},
+ {"INCLUDE", Tok::Include},
+ {"MEMORY", Tok::Memory},
+ {"OUTPUT", Tok::Output},
+ {"SEARCH_DIR", Tok::SearchDir},
+ {"STARTUP", Tok::Startup},
+ {"INSERT", Tok::Insert},
+ {"AFTER", Tok::After},
+ {"OUTPUT_FORMAT", Tok::OutputFormat},
+ {"TARGET", Tok::Target},
+ {"ASSERT", Tok::Assert},
+ {"CONSTANT", Tok::Constant},
+ {"EXTERN", Tok::Extern},
+ {"OUTPUT_ARCH", Tok::OutputArch},
+ {"NOCROSSREFS", Tok::Nocrossrefs},
+ {"NOCROSSREFS_TO", Tok::NocrossrefsTo},
+ {"PROVIDE", Tok::Provide},
+ {"HIDDEN", Tok::Hidden},
+ {"PROVIDE_HIDDEN", Tok::ProvideHidden},
+ {"SECTIONS", Tok::Sections},
+ {"BEFORE", Tok::Before},
+ {"EXCLUDE_FILE", Tok::ExcludeFile},
+ {"KEEP", Tok::Keep},
+ {"INPUT_SECTION_FLAGS", Tok::InputSectionFlags},
+ {"OVERLAY", Tok::Overlay},
+ {"NOLOAD", Tok::Noload},
+ {"COPY", Tok::Copy},
+ {"INFO", Tok::Info},
+ {"OVERWRITE_SECTIONS", Tok::OverwriteSections},
+ {"SUBALIGN", Tok::Subalign},
+ {"ONLY_IF_RO", Tok::OnlyIfRO},
----------------
mysterymath wrote:
OnlyIfRo, and OnlyIfRw to follow the naming convention of the others (underscores separating words).
https://github.com/llvm/llvm-project/pull/100180
More information about the llvm-commits
mailing list