[llvm] f7eec83 - [AsmParser][SystemZ][z/OS] Add in support to allow use of additional comment strings.

Anirudh Prasad via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 13 08:15:15 PDT 2021


Author: Anirudh Prasad
Date: 2021-04-13T11:15:09-04:00
New Revision: f7eec83932b541f6db295284d7c23731d8db9ce3

URL: https://github.com/llvm/llvm-project/commit/f7eec83932b541f6db295284d7c23731d8db9ce3
DIFF: https://github.com/llvm/llvm-project/commit/f7eec83932b541f6db295284d7c23731d8db9ce3.diff

LOG: [AsmParser][SystemZ][z/OS] Add in support to allow use of additional comment strings.

- Currently, MCAsmInfo provides a CommentString attribute, that various targets can set, so that the AsmLexer can appropriately lex a string as a comment based on the set value of the attribute.
- However, AsmLexer also supports a few additional comment syntaxes, in addition to what's specified as a CommentString attribute. This includes regular C-style block comments (/* ... */), regular C-style line comments (// .... ) and #. While I'm not sure as to why this behaviour exists, I am assuming it does to maintain backward compatibility with GNU AS (see https://sourceware.org/binutils/docs/as/Comments.html#Comments for reference)
For example:
Consider a target which sets the CommentString attribute to '*'.
The following strings are all lexed as comments.

```
"# abc" -> comment
"// abc" -> comment
"/* abc */ -> comment
"* abc" -> comment
```

- In HLASM however, only "*" is accepted as a comment string, and nothing else.
- To achieve this, an additional attribute (`AllowAdditionalComments`) has been added to MCAsmInfo. If this attribute is set to false, then only the string specified by the CommentString attribute is used as a possible comment string to be lexed by the AsmLexer. The regular C-style block comments, line comments and "#" are disabled. As a final note, "#" will still be treated as a comment, if the CommentString attribute is set to "#".

Depends on https://reviews.llvm.org/D99277

Reviewed By: abhina.sreeskantharajan, myiwanch

Differential Revision: https://reviews.llvm.org/D99286

Added: 
    

Modified: 
    llvm/include/llvm/MC/MCAsmInfo.h
    llvm/lib/MC/MCParser/AsmLexer.cpp
    llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
    llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h
index 216e01985ccd8..656cb29e2130b 100644
--- a/llvm/include/llvm/MC/MCAsmInfo.h
+++ b/llvm/include/llvm/MC/MCAsmInfo.h
@@ -130,6 +130,14 @@ class MCAsmInfo {
   /// at the beginning of statements. Defaults to false.
   bool RestrictCommentStringToStartOfStatement = false;
 
+  /// This indicates whether to allow additional "comment strings" to be lexed
+  /// as a comment. Setting this attribute to true, will ensure that C-style
+  /// line comments (// ..), C-style block comments (/* .. */), and "#" are
+  /// all treated as comments in addition to the string specified by the
+  /// CommentString attribute.
+  /// Default is true.
+  bool AllowAdditionalComments = true;
+
   /// This is appended to emitted labels.  Defaults to ":"
   const char *LabelSuffix;
 
@@ -567,6 +575,7 @@ class MCAsmInfo {
   bool getRestrictCommentStringToStartOfStatement() const {
     return RestrictCommentStringToStartOfStatement;
   }
+  bool shouldAllowAdditionalComments() const { return AllowAdditionalComments; }
   const char *getLabelSuffix() const { return LabelSuffix; }
 
   bool useAssignmentForEHBegin() const { return UseAssignmentForEHBegin; }

diff  --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index f8e8eea9c2a05..ab105c6100572 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -175,7 +175,13 @@ AsmToken AsmLexer::LexIdentifier() {
 
 /// LexSlash: Slash: /
 ///           C-Style Comment: /* ... */
+///           C-style Comment: // ...
 AsmToken AsmLexer::LexSlash() {
+  if (!MAI.shouldAllowAdditionalComments()) {
+    IsAtStartOfStatement = false;
+    return AsmToken(AsmToken::Slash, StringRef(TokStart, 1));
+  }
+
   switch (*CurPtr) {
   case '*':
     IsAtStartOfStatement = false;
@@ -729,7 +735,9 @@ AsmToken AsmLexer::LexToken() {
       UnLex(TokenBuf[0]);
       return AsmToken(AsmToken::HashDirective, s);
     }
-    return LexLineComment();
+
+    if (MAI.shouldAllowAdditionalComments())
+      return LexLineComment();
   }
 
   if (isAtStartOfComment(TokStart))

diff  --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index 8c4567cd1c4ed..0c7a1338aaa2d 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -23,6 +23,7 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) {
 
   CommentString = AssemblerDialect == AD_HLASM ? "*" : "#";
   RestrictCommentStringToStartOfStatement = (AssemblerDialect == AD_HLASM);
+  AllowAdditionalComments = (AssemblerDialect == AD_ATT);
   ZeroDirective = "\t.space\t";
   Data64bitsDirective = "\t.quad\t";
   UsesELFSectionDirectiveForBSS = true;

diff  --git a/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp b/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
index 8eea737886d63..a1253eaff43d7 100644
--- a/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
+++ b/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
@@ -32,6 +32,9 @@ class MockedUpMCAsmInfo : public MCAsmInfo {
     RestrictCommentStringToStartOfStatement = Value;
   }
   void setCommentString(StringRef Value) { CommentString = Value; }
+  void setAllowAdditionalComments(bool Value) {
+    AllowAdditionalComments = Value;
+  }
 };
 
 // Setup a testing class that the GTest framework can call.
@@ -213,4 +216,155 @@ TEST_F(SystemZAsmLexerTest, CheckAllowHashInIdentifier2) {
        AsmToken::EndOfStatement, AsmToken::Eof});
   lexAndCheckTokens(AsmStr, ExpectedTokens);
 }
+
+TEST_F(SystemZAsmLexerTest, DontCheckStrictCommentString) {
+  StringRef AsmStr = "# abc\n/* def *///  xyz";
+
+  // Setup.
+  setupCallToAsmParser(AsmStr);
+
+  // Lex initially to get the string.
+  Parser->getLexer().Lex();
+
+  SmallVector<AsmToken::TokenKind> ExpectedTokens(
+      {AsmToken::EndOfStatement, AsmToken::Comment, AsmToken::EndOfStatement,
+       AsmToken::Eof});
+  lexAndCheckTokens(AsmStr, ExpectedTokens);
+}
+
+TEST_F(SystemZAsmLexerTest, DontCheckStrictCommentString2) {
+  StringRef AsmStr = "# abc\n/* def *///  xyz\n* rst";
+
+  // Setup.
+  MUPMAI->setCommentString("*");
+  setupCallToAsmParser(AsmStr);
+
+  // Lex initially to get the string.
+  Parser->getLexer().Lex();
+
+  SmallVector<AsmToken::TokenKind> ExpectedTokens(
+      {AsmToken::EndOfStatement, AsmToken::Comment, AsmToken::EndOfStatement,
+       AsmToken::EndOfStatement, AsmToken::Eof});
+  lexAndCheckTokens(AsmStr, ExpectedTokens);
+}
+
+TEST_F(SystemZAsmLexerTest, CheckStrictCommentString) {
+  StringRef AsmStr = "# abc\n/* def *///  xyz";
+
+  // Setup.
+  MUPMAI->setAllowAdditionalComments(false);
+  setupCallToAsmParser(AsmStr);
+
+  // Lex initially to get the string.
+  Parser->getLexer().Lex();
+
+  // "# abc" -> still treated as a comment, since CommentString
+  //            is set to "#"
+  SmallVector<AsmToken::TokenKind> ExpectedTokens;
+  ExpectedTokens.push_back(AsmToken::EndOfStatement); // "# abc\n"
+  ExpectedTokens.push_back(AsmToken::Slash);          // "/"
+  ExpectedTokens.push_back(AsmToken::Star);           // "*"
+  ExpectedTokens.push_back(AsmToken::Identifier);     // "def"
+  ExpectedTokens.push_back(AsmToken::Star);           // "*"
+  ExpectedTokens.push_back(AsmToken::Slash);          // "/"
+  ExpectedTokens.push_back(AsmToken::Slash);          // "/"
+  ExpectedTokens.push_back(AsmToken::Slash);          // "/"
+  ExpectedTokens.push_back(AsmToken::Identifier);     // "xyz"
+  ExpectedTokens.push_back(AsmToken::EndOfStatement);
+  ExpectedTokens.push_back(AsmToken::Eof);
+
+  lexAndCheckTokens(AsmStr, ExpectedTokens);
+}
+
+TEST_F(SystemZAsmLexerTest, CheckStrictCommentString2) {
+  StringRef AsmStr = "// abc";
+
+  // Setup.
+  MUPMAI->setAllowAdditionalComments(false);
+  MUPMAI->setCommentString("//");
+  setupCallToAsmParser(AsmStr);
+
+  // Lex initially to get the string.
+  Parser->getLexer().Lex();
+
+  // "// abc" -> will still be treated as a comment because "//" is the
+  //             CommentString
+  SmallVector<AsmToken::TokenKind> ExpectedTokens(
+      {AsmToken::EndOfStatement, AsmToken::Eof});
+  lexAndCheckTokens(AsmStr /* "// abc" */, ExpectedTokens);
+}
+
+TEST_F(SystemZAsmLexerTest, CheckStrictCommentString3) {
+  StringRef AsmStr = "/* abc */";
+
+  // Setup.
+  MUPMAI->setAllowAdditionalComments(false);
+  setupCallToAsmParser(AsmStr);
+
+  // Lex initially to get the string.
+  Parser->getLexer().Lex();
+
+  SmallVector<AsmToken::TokenKind> ExpectedTokens;
+  ExpectedTokens.push_back(AsmToken::Slash);
+  ExpectedTokens.push_back(AsmToken::Star);
+  ExpectedTokens.push_back(AsmToken::Identifier);
+  ExpectedTokens.push_back(AsmToken::Star);
+  ExpectedTokens.push_back(AsmToken::Slash);
+  ExpectedTokens.push_back(AsmToken::EndOfStatement);
+  ExpectedTokens.push_back(AsmToken::Eof);
+
+  lexAndCheckTokens(AsmStr, ExpectedTokens);
+}
+
+TEST_F(SystemZAsmLexerTest, CheckStrictCommentString4) {
+  StringRef AsmStr = "# abc\n/* def *///  xyz";
+
+  // Setup.
+  MUPMAI->setCommentString("*");
+  MUPMAI->setAllowAdditionalComments(false);
+  MUPMAI->setRestrictCommentStringToStartOfStatement(true);
+  setupCallToAsmParser(AsmStr);
+
+  // Lex initially to get the string.
+  Parser->getLexer().Lex();
+
+  SmallVector<AsmToken::TokenKind> ExpectedTokens;
+  ExpectedTokens.push_back(AsmToken::Hash);           // "#"
+  ExpectedTokens.push_back(AsmToken::Identifier);     // "abc"
+  ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n"
+  ExpectedTokens.push_back(AsmToken::Slash);          // "/"
+  ExpectedTokens.push_back(AsmToken::Star);           // "*"
+  ExpectedTokens.push_back(AsmToken::Identifier);     // "def"
+  ExpectedTokens.push_back(AsmToken::Star);           // "*"
+  ExpectedTokens.push_back(AsmToken::Slash);          // "/"
+  ExpectedTokens.push_back(AsmToken::Slash);          // "/"
+  ExpectedTokens.push_back(AsmToken::Slash);          // "/"
+  ExpectedTokens.push_back(AsmToken::Identifier);     // "xyz"
+  ExpectedTokens.push_back(AsmToken::EndOfStatement);
+  ExpectedTokens.push_back(AsmToken::Eof);
+
+  lexAndCheckTokens(AsmStr, ExpectedTokens);
+}
+
+TEST_F(SystemZAsmLexerTest, CheckStrictCommentString5) {
+  StringRef AsmStr = "#abc\n/* def */// xyz";
+
+  // Setup.
+  MUPMAI->setCommentString("*");
+  MUPMAI->setAllowAdditionalComments(false);
+  setupCallToAsmParser(AsmStr);
+
+  // Lex initially to get the string.
+  Parser->getLexer().Lex();
+
+  SmallVector<AsmToken::TokenKind> ExpectedTokens;
+  ExpectedTokens.push_back(AsmToken::Hash);           // "#"
+  ExpectedTokens.push_back(AsmToken::Identifier);     // "abc"
+  ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n"
+  ExpectedTokens.push_back(AsmToken::Slash);          // "/"
+  ExpectedTokens.push_back(AsmToken::EndOfStatement); // "* def */// xyz"
+  ExpectedTokens.push_back(AsmToken::Eof);
+
+  lexAndCheckTokens(AsmStr, ExpectedTokens);
+}
 } // end anonymous namespace


        


More information about the llvm-commits mailing list