[llvm] 6ddd8c2 - [AsmParser][SystemZ][z/OS] Add support to AsmLexer to accept HLASM style integers

Anirudh Prasad via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 13 12:29:46 PDT 2021


Author: Anirudh Prasad
Date: 2021-04-13T15:29:37-04:00
New Revision: 6ddd8c28b787b50a37df84890563d46dbfc3a81c

URL: https://github.com/llvm/llvm-project/commit/6ddd8c28b787b50a37df84890563d46dbfc3a81c
DIFF: https://github.com/llvm/llvm-project/commit/6ddd8c28b787b50a37df84890563d46dbfc3a81c.diff

LOG: [AsmParser][SystemZ][z/OS] Add support to AsmLexer to accept HLASM style integers

- Add support for HLASM style integers. These are the decimal integers [0-9].
- HLASM does not support the additional prefixed integers like, `0b`, `0x`, octal integers and Masm style integers.
- To achieve this, a field `LexHLASMStyleIntegers` (similar to the `LexMasmStyleIntegers` field) is introduced in `MCAsmLexer.h` as well as a corresponding setter.

Note: This field could also go into MCAsmInfo.h. I used the previous precedent set by the `LexMasmIntegers` field.

Depends on https://reviews.llvm.org/D99286

Reviewed By: epastor

Differential Revision: https://reviews.llvm.org/D99374

Added: 
    

Modified: 
    llvm/include/llvm/MC/MCParser/MCAsmLexer.h
    llvm/lib/MC/MCParser/AsmLexer.cpp
    llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
index bbc890c5644a4..6a604014a8374 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -56,6 +56,7 @@ class MCAsmLexer {
   bool LexMotorolaIntegers = false;
   bool UseMasmDefaultRadix = false;
   unsigned DefaultRadix = 10;
+  bool LexHLASMIntegers = false;
   AsmCommentConsumer *CommentConsumer = nullptr;
 
   MCAsmLexer();
@@ -176,6 +177,9 @@ class MCAsmLexer {
   /// Set whether to lex Motorola-style integer literals, such as $deadbeef or
   /// %01010110.
   void setLexMotorolaIntegers(bool V) { LexMotorolaIntegers = V; }
+
+  /// Set whether to lex HLASM-flavour integers. For now this is only [0-9]*
+  void setLexHLASMIntegers(bool V) { LexHLASMIntegers = V; }
 };
 
 } // end namespace llvm

diff  --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index ab105c6100572..5fe3be42c801b 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -456,26 +456,32 @@ AsmToken AsmLexer::LexDigit() {
   }
 
   // Decimal integer: [1-9][0-9]*
-  if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
+  // HLASM-flavour decimal integer: [0-9][0-9]*
+  // FIXME: Later on, support for fb for HLASM has to be added in
+  // as they probably would be needed for asm goto
+  if (LexHLASMIntegers || CurPtr[-1] != '0' || CurPtr[0] == '.') {
     unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
-    bool isHex = Radix == 16;
-    // Check for floating point literals.
-    if (!isHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) {
-      if (*CurPtr == '.')
-        ++CurPtr;
-      return LexFloatLiteral();
+
+    if (!LexHLASMIntegers) {
+      bool IsHex = Radix == 16;
+      // Check for floating point literals.
+      if (!IsHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) {
+        if (*CurPtr == '.')
+          ++CurPtr;
+        return LexFloatLiteral();
+      }
     }
 
     StringRef Result(TokStart, CurPtr - TokStart);
 
     APInt Value(128, 0, true);
-    if (Result.getAsInteger(Radix, Value)) {
+    if (Result.getAsInteger(Radix, Value))
       return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
-    }
 
-    // The darwin/x86 (and x86-64) assembler accepts and ignores type
-    // suffices on integer literals.
-    SkipIgnoredIntegerSuffix(CurPtr);
+    if (!LexHLASMIntegers)
+      // The darwin/x86 (and x86-64) assembler accepts and ignores type
+      // suffices on integer literals.
+      SkipIgnoredIntegerSuffix(CurPtr);
 
     return intToken(Result, Value);
   }

diff  --git a/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp b/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
index a1253eaff43d7..d7e90f1b9a249 100644
--- a/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
+++ b/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp
@@ -109,6 +109,21 @@ class SystemZAsmLexerTest : public ::testing::Test {
       Lexer.Lex();
     }
   }
+
+  void lexAndCheckIntegerTokensAndValues(StringRef AsmStr,
+                                         SmallVector<int64_t> ExpectedValues) {
+    // Get reference to AsmLexer.
+    MCAsmLexer &Lexer = Parser->getLexer();
+    // Loop through all expected tokens and expected values.
+    for (size_t I = 0; I < ExpectedValues.size(); ++I) {
+      // Skip any EndOfStatement tokens, we're not concerned with them.
+      if (Lexer.getTok().getKind() == AsmToken::EndOfStatement)
+        continue;
+      EXPECT_EQ(Lexer.getTok().getKind(), AsmToken::Integer);
+      EXPECT_EQ(Lexer.getTok().getIntVal(), ExpectedValues[I]);
+      Lexer.Lex();
+    }
+  }
 };
 
 TEST_F(SystemZAsmLexerTest, CheckDontRestrictCommentStringToStartOfStatement) {
@@ -367,4 +382,76 @@ TEST_F(SystemZAsmLexerTest, CheckStrictCommentString5) {
 
   lexAndCheckTokens(AsmStr, ExpectedTokens);
 }
+
+TEST_F(SystemZAsmLexerTest, CheckValidHLASMIntegers) {
+  StringRef AsmStr = "123\n000123\n1999\n007\n12300\n12021\n";
+  // StringRef AsmStr = "123";
+  // Setup.
+  setupCallToAsmParser(AsmStr);
+  Parser->getLexer().setLexHLASMIntegers(true);
+
+  // Lex initially to get the string.
+  Parser->getLexer().Lex();
+
+  // SmallVector<int64_t> ExpectedValues({123});
+  SmallVector<int64_t> ExpectedValues({123, 123, 1999, 7, 12300, 12021});
+  lexAndCheckIntegerTokensAndValues(AsmStr, ExpectedValues);
+}
+
+TEST_F(SystemZAsmLexerTest, CheckInvalidHLASMIntegers) {
+  StringRef AsmStr = "0b0101\n0xDEADBEEF\nfffh\n.133\n";
+
+  // Setup.
+  setupCallToAsmParser(AsmStr);
+  Parser->getLexer().setLexHLASMIntegers(true);
+
+  // Lex initially to get the string.
+  Parser->getLexer().Lex();
+
+  SmallVector<AsmToken::TokenKind> ExpectedTokens;
+  ExpectedTokens.push_back(AsmToken::Integer);        // "0"
+  ExpectedTokens.push_back(AsmToken::Identifier);     // "b0101"
+  ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n"
+  ExpectedTokens.push_back(AsmToken::Integer);        // "0"
+  ExpectedTokens.push_back(AsmToken::Identifier);     // "xDEADBEEF"
+  ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n"
+  ExpectedTokens.push_back(AsmToken::Identifier);     // "fffh"
+  ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n"
+  ExpectedTokens.push_back(AsmToken::Real);           // ".133"
+  ExpectedTokens.push_back(AsmToken::EndOfStatement); // "\n"
+  ExpectedTokens.push_back(AsmToken::Eof);
+  lexAndCheckTokens(AsmStr, ExpectedTokens);
+}
+
+TEST_F(SystemZAsmLexerTest, CheckDefaultIntegers) {
+  StringRef AsmStr = "0b0101\n0xDEADBEEF\nfffh\n";
+
+  // Setup.
+  setupCallToAsmParser(AsmStr);
+
+  // Lex initially to get the string.
+  Parser->getLexer().Lex();
+
+  SmallVector<int64_t> ExpectedValues({5, 0xDEADBEEF, 0xFFF});
+  lexAndCheckIntegerTokensAndValues(AsmStr, ExpectedValues);
+}
+
+TEST_F(SystemZAsmLexerTest, CheckDefaultFloats) {
+  StringRef AsmStr = "0.333\n1.3\n2.5\n3.0\n";
+
+  // Setup.
+  setupCallToAsmParser(AsmStr);
+
+  // Lex initially to get the string.
+  Parser->getLexer().Lex();
+
+  SmallVector<AsmToken::TokenKind> ExpectedTokens;
+
+  for (int I = 0; I < 4; ++I)
+    ExpectedTokens.insert(ExpectedTokens.begin(),
+                          {AsmToken::Real, AsmToken::EndOfStatement});
+
+  ExpectedTokens.push_back(AsmToken::Eof);
+  lexAndCheckTokens(AsmStr, ExpectedTokens);
+}
 } // end anonymous namespace


        


More information about the llvm-commits mailing list