[llvm] 682fe17 - [MC/AsmLexer] Add '?' (Question) token

Sergei Barannikov via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 13 17:46:22 PDT 2023


Author: Sergei Barannikov
Date: 2023-07-14T03:46:13+03:00
New Revision: 682fe17e08c52bbdd2c2894ed08a3f9445660cbe

URL: https://github.com/llvm/llvm-project/commit/682fe17e08c52bbdd2c2894ed08a3f9445660cbe
DIFF: https://github.com/llvm/llvm-project/commit/682fe17e08c52bbdd2c2894ed08a3f9445660cbe.diff

LOG: [MC/AsmLexer] Add '?' (Question) token

'?' is a valid token in our downstream target. There seem to be no way
to do target-specific lexing, so just add make AsmParser recognize it.

Reviewed By: MaskRay

Differential Revision: https://reviews.llvm.org/D154202

Added: 
    

Modified: 
    llvm/include/llvm/MC/MCAsmMacro.h
    llvm/lib/MC/MCParser/AsmLexer.cpp
    llvm/lib/MC/MCParser/MCAsmLexer.cpp
    llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCAsmMacro.h b/llvm/include/llvm/MC/MCAsmMacro.h
index 3e8d898af308ab..e2989c09017a42 100644
--- a/llvm/include/llvm/MC/MCAsmMacro.h
+++ b/llvm/include/llvm/MC/MCAsmMacro.h
@@ -46,7 +46,7 @@ class AsmToken {
     Slash,     // '/'
     BackSlash, // '\'
     LParen, RParen, LBrac, RBrac, LCurly, RCurly,
-    Star, Dot, Comma, Dollar, Equal, EqualEqual,
+    Question, Star, Dot, Comma, Dollar, Equal, EqualEqual,
 
     Pipe, PipePipe, Caret,
     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,

diff  --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index 1b20b2b6eb2834..f13549b24e2dd2 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -776,9 +776,11 @@ AsmToken AsmLexer::LexToken() {
   IsAtStartOfStatement = false;
   switch (CurChar) {
   default:
-    // Handle identifier: [a-zA-Z_.?][a-zA-Z0-9_$.@#?]*
-    if (isalpha(CurChar) || CurChar == '_' || CurChar == '.' ||
-        (MAI.doesAllowQuestionAtStartOfIdentifier() && CurChar == '?'))
+    // Handle identifier: [a-zA-Z_.$@#?][a-zA-Z0-9_.$@#?]*
+    // Whether or not the lexer accepts '$', '@', '#' and '?' at the start of
+    // an identifier is target-dependent. These characters are handled in the
+    // respective switch cases.
+    if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
       return LexIdentifier();
 
     // Unknown character, emit an error.
@@ -830,11 +832,18 @@ AsmToken AsmLexer::LexToken() {
       return LexIdentifier();
     return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
   }
-  case '@': {
+  case '@':
     if (MAI.doesAllowAtAtStartOfIdentifier())
       return LexIdentifier();
     return AsmToken(AsmToken::At, StringRef(TokStart, 1));
-  }
+  case '#':
+    if (MAI.doesAllowHashAtStartOfIdentifier())
+      return LexIdentifier();
+    return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
+  case '?':
+    if (MAI.doesAllowQuestionAtStartOfIdentifier())
+      return LexIdentifier();
+    return AsmToken(AsmToken::Question, StringRef(TokStart, 1));
   case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
   case '=':
     if (*CurPtr == '=') {
@@ -914,11 +923,6 @@ AsmToken AsmLexer::LexToken() {
   case '/':
     IsAtStartOfStatement = OldIsAtStartOfStatement;
     return LexSlash();
-  case '#': {
-    if (MAI.doesAllowHashAtStartOfIdentifier())
-      return LexIdentifier();
-    return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
-  }
   case '\'': return LexSingleQuote();
   case '"': return LexQuote();
   case '0': case '1': case '2': case '3': case '4':

diff  --git a/llvm/lib/MC/MCParser/MCAsmLexer.cpp b/llvm/lib/MC/MCParser/MCAsmLexer.cpp
index 632c52479d705d..f202b53732fc77 100644
--- a/llvm/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/MCAsmLexer.cpp
@@ -88,6 +88,7 @@ void AsmToken::dump(raw_ostream &OS) const {
   case AsmToken::Pipe:               OS << "Pipe"; break;
   case AsmToken::PipePipe:           OS << "PipePipe"; break;
   case AsmToken::Plus:               OS << "Plus"; break;
+  case AsmToken::Question:           OS << "Question"; break;
   case AsmToken::RBrac:              OS << "RBrac"; break;
   case AsmToken::RCurly:             OS << "RCurly"; break;
   case AsmToken::RParen:             OS << "RParen"; break;

diff  --git a/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp b/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
index cd0e1b40d5002b..53cb265a4162c2 100644
--- a/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
+++ b/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
@@ -358,7 +358,7 @@ TEST_F(SystemZAsmLexerLinux, CheckDefaultQuestionAtStartOfIdentifier) {
   Parser->getLexer().Lex();
 
   SmallVector<AsmToken::TokenKind> ExpectedTokens(
-      {AsmToken::Error, AsmToken::Identifier, AsmToken::EndOfStatement,
+      {AsmToken::Question, AsmToken::Identifier, AsmToken::EndOfStatement,
        AsmToken::Eof});
   lexAndCheckTokens(AsmStr, ExpectedTokens);
 }


        


More information about the llvm-commits mailing list