[llvm] 7b921a6 - [AsmParser][SystemZ][z/OS] Add in support to accept "#" as part of an Identifier token
Anirudh Prasad via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 1 08:24:51 PDT 2021
Author: Anirudh Prasad
Date: 2021-04-01T11:24:43-04:00
New Revision: 7b921a674756dacebbe0431211ce43edb3493230
URL: https://github.com/llvm/llvm-project/commit/7b921a674756dacebbe0431211ce43edb3493230
DIFF: https://github.com/llvm/llvm-project/commit/7b921a674756dacebbe0431211ce43edb3493230.diff
LOG: [AsmParser][SystemZ][z/OS] Add in support to accept "#" as part of an Identifier token
- This patch adds in support to accept the "#" character as part of an Identifier.
- This support is needed especially for the HLASM dialect since "#" is treated as part of the valid "Alphabet" range
- The way this is done is by making use of the previous precedent set by the `AllowAtInIdentifier` field in `MCAsmLexer.h`. A new field called `AllowHashInIdentifier` is introduced.
- The static function `IsIdentifierChar` is also updated to accept the `#` character if the `AllowHashInIdentifier` field is set to true.
Note: The field introduced in `MCAsmLexer.h` could very well be moved to `MCAsmInfo.h`. I'm not opposed to it. I decided to put it in `MCAsmLexer` since there seems to be some sort of precedent already with `AllowAtInIdentifier`.
Reviewed By: abhina.sreeskantharajan, nickdesaulniers, MaskRay
Differential Revision: https://reviews.llvm.org/D99277
Added:
Modified:
llvm/include/llvm/MC/MCParser/MCAsmLexer.h
llvm/lib/MC/MCParser/AsmLexer.cpp
llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
index 21966d1c742dd..d6ef92ca0b852 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -48,6 +48,7 @@ class MCAsmLexer {
const char *TokStart = nullptr;
bool SkipSpace = true;
bool AllowAtInIdentifier;
+ bool AllowHashInIdentifier = false;
bool IsAtStartOfStatement = true;
bool LexMasmHexFloats = false;
bool LexMasmIntegers = false;
@@ -147,6 +148,8 @@ class MCAsmLexer {
bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
+ void setAllowHashInIdentifier(bool V) { AllowHashInIdentifier = V; }
+
void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
this->CommentConsumer = CommentConsumer;
}
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index dd481d46f7883..c4006f92b80eb 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -143,10 +143,10 @@ AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
}
-/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
-static bool IsIdentifierChar(char c, bool AllowAt) {
- return isAlnum(c) || c == '_' || c == '$' || c == '.' ||
- (c == '@' && AllowAt) || c == '?';
+/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@#?]*
+static bool isIdentifierChar(char C, bool AllowAt, bool AllowHash) {
+ return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '?' ||
+ (AllowAt && C == '@') || (AllowHash && C == '#');
}
AsmToken AsmLexer::LexIdentifier() {
@@ -156,12 +156,13 @@ AsmToken AsmLexer::LexIdentifier() {
while (isDigit(*CurPtr))
++CurPtr;
- if (!IsIdentifierChar(*CurPtr, AllowAtInIdentifier) ||
+ if (!isIdentifierChar(*CurPtr, AllowAtInIdentifier,
+ AllowHashInIdentifier) ||
*CurPtr == 'e' || *CurPtr == 'E')
return LexFloatLiteral();
}
- while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
+ while (isIdentifierChar(*CurPtr, AllowAtInIdentifier, AllowHashInIdentifier))
++CurPtr;
// Handle . as a special case.
@@ -726,9 +727,10 @@ AsmToken AsmLexer::LexToken() {
switch (CurChar) {
default:
if (MAI.doesAllowSymbolAtNameStart()) {
- // Handle Microsoft-style identifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@?]*
+ // Handle Microsoft-style identifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@#?]*
if (!isDigit(CurChar) &&
- IsIdentifierChar(CurChar, MAI.doesAllowAtInName()))
+ isIdentifierChar(CurChar, MAI.doesAllowAtInName(),
+ AllowHashInIdentifier))
return LexIdentifier();
} else {
// Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
diff --git a/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp b/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
index 183b3ffc9352a..8eea737886d63 100644
--- a/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
+++ b/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
@@ -94,8 +94,6 @@ class SystemZAsmLexerTest : public ::testing::Test {
Str.reset(TheTarget->createNullStreamer(*Ctx));
Parser.reset(createMCAsmParser(SrcMgr, *Ctx, *Str, *MUPMAI));
- // Lex initially to get the string.
- Parser->getLexer().Lex();
}
void lexAndCheckTokens(StringRef AsmStr,
@@ -116,6 +114,9 @@ TEST_F(SystemZAsmLexerTest, CheckDontRestrictCommentStringToStartOfStatement) {
// Setup.
setupCallToAsmParser(AsmStr);
+ // Lex initially to get the string.
+ Parser->getLexer().Lex();
+
SmallVector<AsmToken::TokenKind> ExpectedTokens(
{AsmToken::Identifier, AsmToken::EndOfStatement});
lexAndCheckTokens(AsmStr /* "jne #-4" */, ExpectedTokens);
@@ -129,6 +130,9 @@ TEST_F(SystemZAsmLexerTest, CheckRestrictCommentStringToStartOfStatement) {
MUPMAI->setRestrictCommentStringToStartOfStatement(true);
setupCallToAsmParser(AsmStr);
+ // Lex initially to get the string.
+ Parser->getLexer().Lex();
+
// When we are restricting the comment string to only the start of the
// statement, The sequence of tokens we are expecting are: Identifier - "jne"
// Hash - '#'
@@ -148,8 +152,65 @@ TEST_F(SystemZAsmLexerTest, CheckHLASMComment) {
MUPMAI->setCommentString("*");
setupCallToAsmParser(AsmStr);
+ // Lex initially to get the string.
+ Parser->getLexer().Lex();
+
SmallVector<AsmToken::TokenKind> ExpectedTokens(
{AsmToken::EndOfStatement, AsmToken::Eof});
lexAndCheckTokens(AsmStr /* "* lhi 1,10" */, ExpectedTokens);
}
+
+TEST_F(SystemZAsmLexerTest, CheckHashDefault) {
+ StringRef AsmStr = "lh#123";
+
+ // Setup.
+ setupCallToAsmParser(AsmStr);
+
+ // Lex initially to get the string.
+ Parser->getLexer().Lex();
+
+ // "lh" -> Identifier
+ // "#123" -> EndOfStatement (Lexed as a comment since CommentString is "#")
+ SmallVector<AsmToken::TokenKind> ExpectedTokens(
+ {AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof});
+ lexAndCheckTokens(AsmStr, ExpectedTokens);
+}
+
+// Test if "#" is accepted as an Identifier
+TEST_F(SystemZAsmLexerTest, CheckAllowHashInIdentifier) {
+ StringRef AsmStr = "lh#123";
+
+ // Setup.
+ setupCallToAsmParser(AsmStr);
+ Parser->getLexer().setAllowHashInIdentifier(true);
+
+ // Lex initially to get the string.
+ Parser->getLexer().Lex();
+
+ // "lh123" -> Identifier
+ SmallVector<AsmToken::TokenKind> ExpectedTokens(
+ {AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof});
+ lexAndCheckTokens(AsmStr, ExpectedTokens);
+}
+
+TEST_F(SystemZAsmLexerTest, CheckAllowHashInIdentifier2) {
+ StringRef AsmStr = "lh#12*3";
+
+ // Setup.
+ MUPMAI->setCommentString("*");
+ MUPMAI->setRestrictCommentStringToStartOfStatement(true);
+ setupCallToAsmParser(AsmStr);
+ Parser->getLexer().setAllowHashInIdentifier(true);
+
+ // Lex initially to get the string.
+ Parser->getLexer().Lex();
+
+ // "lh#12" -> Identifier
+ // "*" -> Star
+ // "3" -> Integer
+ SmallVector<AsmToken::TokenKind> ExpectedTokens(
+ {AsmToken::Identifier, AsmToken::Star, AsmToken::Integer,
+ AsmToken::EndOfStatement, AsmToken::Eof});
+ lexAndCheckTokens(AsmStr, ExpectedTokens);
+}
} // end anonymous namespace
More information about the llvm-commits
mailing list