[llvm] 682fe17 - [MC/AsmLexer] Add '?' (Question) token
Sergei Barannikov via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 13 17:46:22 PDT 2023
Author: Sergei Barannikov
Date: 2023-07-14T03:46:13+03:00
New Revision: 682fe17e08c52bbdd2c2894ed08a3f9445660cbe
URL: https://github.com/llvm/llvm-project/commit/682fe17e08c52bbdd2c2894ed08a3f9445660cbe
DIFF: https://github.com/llvm/llvm-project/commit/682fe17e08c52bbdd2c2894ed08a3f9445660cbe.diff
LOG: [MC/AsmLexer] Add '?' (Question) token
'?' is a valid token in our downstream target. There seem to be no way
to do target-specific lexing, so just add make AsmParser recognize it.
Reviewed By: MaskRay
Differential Revision: https://reviews.llvm.org/D154202
Added:
Modified:
llvm/include/llvm/MC/MCAsmMacro.h
llvm/lib/MC/MCParser/AsmLexer.cpp
llvm/lib/MC/MCParser/MCAsmLexer.cpp
llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/MC/MCAsmMacro.h b/llvm/include/llvm/MC/MCAsmMacro.h
index 3e8d898af308ab..e2989c09017a42 100644
--- a/llvm/include/llvm/MC/MCAsmMacro.h
+++ b/llvm/include/llvm/MC/MCAsmMacro.h
@@ -46,7 +46,7 @@ class AsmToken {
Slash, // '/'
BackSlash, // '\'
LParen, RParen, LBrac, RBrac, LCurly, RCurly,
- Star, Dot, Comma, Dollar, Equal, EqualEqual,
+ Question, Star, Dot, Comma, Dollar, Equal, EqualEqual,
Pipe, PipePipe, Caret,
Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index 1b20b2b6eb2834..f13549b24e2dd2 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -776,9 +776,11 @@ AsmToken AsmLexer::LexToken() {
IsAtStartOfStatement = false;
switch (CurChar) {
default:
- // Handle identifier: [a-zA-Z_.?][a-zA-Z0-9_$.@#?]*
- if (isalpha(CurChar) || CurChar == '_' || CurChar == '.' ||
- (MAI.doesAllowQuestionAtStartOfIdentifier() && CurChar == '?'))
+ // Handle identifier: [a-zA-Z_.$@#?][a-zA-Z0-9_.$@#?]*
+ // Whether or not the lexer accepts '$', '@', '#' and '?' at the start of
+ // an identifier is target-dependent. These characters are handled in the
+ // respective switch cases.
+ if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
return LexIdentifier();
// Unknown character, emit an error.
@@ -830,11 +832,18 @@ AsmToken AsmLexer::LexToken() {
return LexIdentifier();
return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
}
- case '@': {
+ case '@':
if (MAI.doesAllowAtAtStartOfIdentifier())
return LexIdentifier();
return AsmToken(AsmToken::At, StringRef(TokStart, 1));
- }
+ case '#':
+ if (MAI.doesAllowHashAtStartOfIdentifier())
+ return LexIdentifier();
+ return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
+ case '?':
+ if (MAI.doesAllowQuestionAtStartOfIdentifier())
+ return LexIdentifier();
+ return AsmToken(AsmToken::Question, StringRef(TokStart, 1));
case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
case '=':
if (*CurPtr == '=') {
@@ -914,11 +923,6 @@ AsmToken AsmLexer::LexToken() {
case '/':
IsAtStartOfStatement = OldIsAtStartOfStatement;
return LexSlash();
- case '#': {
- if (MAI.doesAllowHashAtStartOfIdentifier())
- return LexIdentifier();
- return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
- }
case '\'': return LexSingleQuote();
case '"': return LexQuote();
case '0': case '1': case '2': case '3': case '4':
diff --git a/llvm/lib/MC/MCParser/MCAsmLexer.cpp b/llvm/lib/MC/MCParser/MCAsmLexer.cpp
index 632c52479d705d..f202b53732fc77 100644
--- a/llvm/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/MCAsmLexer.cpp
@@ -88,6 +88,7 @@ void AsmToken::dump(raw_ostream &OS) const {
case AsmToken::Pipe: OS << "Pipe"; break;
case AsmToken::PipePipe: OS << "PipePipe"; break;
case AsmToken::Plus: OS << "Plus"; break;
+ case AsmToken::Question: OS << "Question"; break;
case AsmToken::RBrac: OS << "RBrac"; break;
case AsmToken::RCurly: OS << "RCurly"; break;
case AsmToken::RParen: OS << "RParen"; break;
diff --git a/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp b/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
index cd0e1b40d5002b..53cb265a4162c2 100644
--- a/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
+++ b/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
@@ -358,7 +358,7 @@ TEST_F(SystemZAsmLexerLinux, CheckDefaultQuestionAtStartOfIdentifier) {
Parser->getLexer().Lex();
SmallVector<AsmToken::TokenKind> ExpectedTokens(
- {AsmToken::Error, AsmToken::Identifier, AsmToken::EndOfStatement,
+ {AsmToken::Question, AsmToken::Identifier, AsmToken::EndOfStatement,
AsmToken::Eof});
lexAndCheckTokens(AsmStr, ExpectedTokens);
}
More information about the llvm-commits
mailing list