[lld] [ELF] Added `struct Token` and changed `next()` and `peek()` to return Token (PR #100180)
Daniel Thornburgh via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 24 10:56:15 PDT 2024
================
@@ -155,15 +171,200 @@ void ScriptLexer::tokenize(MemoryBufferRef mb) {
// A character that cannot start a word (which is usually a
// punctuation) forms a single character token.
- if (pos == 0)
+ if (pos == 0) {
pos = 1;
- vec.push_back(s.substr(0, pos));
+ vec.push_back(getOperatorToken(s));
+ } else {
+ vec.push_back(getKeywordorIdentifier(s.substr(0, pos)));
+ }
s = s.substr(pos);
}
tokens.insert(tokens.begin() + pos, vec.begin(), vec.end());
}
+ScriptLexer::Token ScriptLexer::getOperatorToken(StringRef s) {
+ auto createToken = [&](Tok kind, size_t pos) -> Token {
+ return {kind, s.substr(0, pos)};
+ };
+
+ switch (s.front()) {
+ case EOF:
+ return createToken(Tok::Eof, 0);
+ case '(':
+ return createToken(Tok::LeftParenthesis, 1);
+ case ')':
+ return createToken(Tok::RightParenthesis, 1);
+ case '{':
+ return createToken(Tok::LeftCurlyBracket, 1);
+ case '}':
+ return createToken(Tok::RightCurlyBracket, 1);
+ case ';':
+ return createToken(Tok::Semicolon, 1);
+ case ',':
+ return createToken(Tok::Comma, 1);
+ case ':':
+ return createToken(Tok::Colon, 1);
+ case '?':
+ return createToken(Tok::Question, 1);
+ case '%':
+ return createToken(Tok::Percent, 1);
+ case '!':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Tok::NotEqual, 2);
+ return createToken(Tok::Excalamation, 1);
+ case '*':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Tok::MulAssign, 2);
+ return createToken(Tok::Asterisk, 1);
+ case '/':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Tok::DivAssign, 2);
+ return createToken(Tok::Slash, 1);
+ case '=':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Tok::Equal, 2);
+ return createToken(Tok::Assign, 1);
+ case '+':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Tok::PlusAssign, 2);
+ return createToken(Tok::Plus, 1);
+ case '-':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Tok::MinusAssign, 2);
+ return createToken(Tok::Minus, 1);
+ case '<':
+ if (s.size() > 2 && s[1] == s[0] && s[2] == '=')
+ return createToken(Tok::LeftShiftAssign, 3);
+ if (s.size() > 1) {
+ if (s[1] == '=')
+ return createToken(Tok::LessEqual, 2);
+ if (s[1] == '<')
+ return createToken(Tok::LeftShift, 2);
+ }
+ return createToken(Tok::Less, 1);
+ case '>':
+ if (s.size() > 2 && s[1] == s[0] && s[2] == '=')
+ return createToken(Tok::RightShiftAssign, 3);
+ if (s.size() > 1) {
+ if (s[1] == '=')
+ return createToken(Tok::GreaterEqual, 2);
+ if (s[1] == '>')
+ return createToken(Tok::RightShift, 2);
+ }
+ return createToken(Tok::Greater, 1);
+ case '&':
+ if (s.size() > 1) {
+ if (s[1] == '=')
+ return createToken(Tok::AndAssign, 2);
+ if (s[1] == '&')
+ return createToken(Tok::LogicalAnd, 2);
+ }
+ return createToken(Tok::BitwiseAnd, 1);
+ case '^':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Tok::XorAssign, 2);
+ return createToken(Tok::BitwiseXor, 1);
+ case '|':
+ if (s.size() > 1) {
+ if (s[1] == '=')
+ return createToken(Tok::OrAssign, 2);
+ if (s[1] == '|')
+ return createToken(Tok::LogicalOr, 2);
+ }
+ return createToken(Tok::BitwiseOr, 1);
+ case '.':
+ return createToken(Tok::Dot, 1);
+ case '_':
+ return createToken(Tok::Underscore, 1);
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ return createToken(Tok::Decimal, 1);
+ default:
+ return {Tok::Identifier, s};
+ }
+}
+
+const llvm::StringMap<Tok> ScriptLexer::keywordTokMap = {
+ {"ENTRY", Tok::Entry},
+ {"INPUT", Tok::Input},
+ {"GROUP", Tok::Group},
+ {"INCLUDE", Tok::Include},
+ {"MEMORY", Tok::Memory},
+ {"OUTPUT", Tok::Output},
+ {"SEARCH_DIR", Tok::SearchDir},
+ {"STARTUP", Tok::Startup},
+ {"INSERT", Tok::Insert},
+ {"AFTER", Tok::After},
+ {"OUTPUT_FORMAT", Tok::OutputFormat},
+ {"TARGET", Tok::Target},
+ {"ASSERT", Tok::Assert},
+ {"CONSTANT", Tok::Constant},
+ {"EXTERN", Tok::Extern},
+ {"OUTPUT_ARCH", Tok::OutputArch},
+ {"NOCROSSREFS", Tok::Nocrossrefs},
+ {"NOCROSSREFS_TO", Tok::NocrossrefsTo},
+ {"PROVIDE", Tok::Provide},
+ {"HIDDEN", Tok::Hidden},
+ {"PROVIDE_HIDDEN", Tok::ProvideHidden},
+ {"SECTIONS", Tok::Sections},
+ {"BEFORE", Tok::Before},
+ {"EXCLUDE_FILE", Tok::ExcludeFile},
+ {"KEEP", Tok::Keep},
+ {"INPUT_SECTION_FLAGS", Tok::InputSectionFlags},
+ {"OVERLAY", Tok::Overlay},
+ {"NOLOAD", Tok::Noload},
+ {"COPY", Tok::Copy},
+ {"INFO", Tok::Info},
+ {"OVERWRITE_SECTIONS", Tok::OverwriteSections},
+ {"SUBALIGN", Tok::Subalign},
+ {"ONLY_IF_RO", Tok::OnlyIfRO},
+ {"ONLY_IF_RW", Tok::OnlyIfRW},
+ {"FILL", Tok::Fill},
+ {"SORT", Tok::Sort},
+ {"ABSOLUTE", Tok::Absolute},
+ {"ADDR", Tok::Addr},
+ {"ALIGN", Tok::Align},
+ {"ALIGNOF", Tok::Alignof},
+ {"DATA_SEGMENT_ALIGN", Tok::DataSegmentAlign},
+ {"DATA_SEGMENT_END", Tok::DataSegmentEnd},
+ {"DATA_SEGMENT_RELRO_END", Tok::DataSegmentRelroEnd},
+ {"DEFINED", Tok::Defined},
+ {"LENGTH", Tok::Length},
+ {"LOADADDR", Tok::Loadaddr},
+ {"LOG2CEIL", Tok::Log2ceil},
+ {"MAX", Tok::Max},
+ {"MIN", Tok::Min},
+ {"ORIGIN", Tok::Origin},
+ {"SEGMENT_START", Tok::SegmentStart},
+ {"SIZEOF", Tok::Sizeof},
+ {"SIZEOF_HEADERS", Tok::SizeofHeaders},
+ {"FILEHDR", Tok::Filehdr},
+ {"PHDRS", Tok::Phdrs},
+ {"AT", Tok::At},
+ {"FLAGS", Tok::Flags},
+ {"VERSION", Tok::Version},
+ {"REGION_ALIAS", Tok::RegionAlias},
+ {"AS_NEEDED", Tok::AsNeeded},
+ {"CONSTRUCTORS", Tok::Constructors},
+ {"MAXPAGESIZE", Tok::Maxpagesize},
+ {"COMMONPAGESIZE", Tok::Commonpagesize}};
+
+ScriptLexer::Token ScriptLexer::getKeywordorIdentifier(StringRef s) {
----------------
mysterymath wrote:
getKeywordOrIdentifier (capitalize the "O").
https://github.com/llvm/llvm-project/pull/100180
More information about the llvm-commits
mailing list