[llvm] r345189 - [MC] Separate masm integer literal lexer support from inline asm

Reid Kleckner via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 24 13:23:57 PDT 2018


Author: rnk
Date: Wed Oct 24 13:23:57 2018
New Revision: 345189

URL: http://llvm.org/viewvc/llvm-project?rev=345189&view=rev
Log:
[MC] Separate masm integer literal lexer support from inline asm

Summary:
This renames the IsParsingMSInlineAsm member variable of AsmLexer to
LexMasmIntegers and moves it up to MCAsmLexer. This is the only behavior
controlled by that variable. I added a public setter, so that it can be
set from outside or from the llvm-mc command line. We may need to
arrange things so that users can get this behavior from clang, but
that's future work.

I also put additional hex literal lexing functionality under this flag
to fix PR32973. It appears that this hex literal parsing wasn't intended
to be enabled in non-masm-style blocks.

Now, masm integers (0b1101 and 0ABCh) work in __asm blocks from clang,
but 0b label references work when using .intel_syntax in standalone .s
files.

However, 0b label references will *not* work from __asm blocks in clang.
They will work from GCC inline asm blocks, which it sounds like is
important for Crypto++ as mentioned in PR36144.

Essentially, we only lex masm literals for inline asm blobs that use
intel syntax. If the .intel_syntax directive is used inside a gnu-style
inline asm statement, masm literals will not be lexed, which is
compatible with gas and llvm-mc standalone .s assembly.

This fixes PR36144 and PR32973.

Reviewers: Gerolf, avt77

Subscribers: eraman, hiraditya, llvm-commits

Differential Revision: https://reviews.llvm.org/D53535

Added:
    llvm/trunk/test/MC/AArch64/macro-hex-int.s
Modified:
    llvm/trunk/include/llvm/MC/MCParser/AsmLexer.h
    llvm/trunk/include/llvm/MC/MCParser/MCAsmLexer.h
    llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
    llvm/trunk/lib/MC/MCParser/AsmLexer.cpp
    llvm/trunk/lib/MC/MCParser/AsmParser.cpp
    llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp
    llvm/trunk/test/MC/X86/intel-syntax-hex.s
    llvm/trunk/test/MC/X86/pr27884.s
    llvm/trunk/test/tools/llvm-mca/X86/intel-syntax.s
    llvm/trunk/tools/llvm-mc/llvm-mc.cpp

Modified: llvm/trunk/include/llvm/MC/MCParser/AsmLexer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCParser/AsmLexer.h?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/include/llvm/MC/MCParser/AsmLexer.h (original)
+++ llvm/trunk/include/llvm/MC/MCParser/AsmLexer.h Wed Oct 24 13:23:57 2018
@@ -30,7 +30,6 @@ class AsmLexer : public MCAsmLexer {
   StringRef CurBuf;
   bool IsAtStartOfLine = true;
   bool IsAtStartOfStatement = true;
-  bool IsParsingMSInlineAsm = false;
   bool IsPeeking = false;
 
 protected:
@@ -44,7 +43,6 @@ public:
   ~AsmLexer() override;
 
   void setBuffer(StringRef Buf, const char *ptr = nullptr);
-  void setParsingMSInlineAsm(bool V) { IsParsingMSInlineAsm = V; }
 
   StringRef LexUntilEndOfStatement() override;
 

Modified: llvm/trunk/include/llvm/MC/MCParser/MCAsmLexer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCParser/MCAsmLexer.h?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/include/llvm/MC/MCParser/MCAsmLexer.h (original)
+++ llvm/trunk/include/llvm/MC/MCParser/MCAsmLexer.h Wed Oct 24 13:23:57 2018
@@ -50,6 +50,7 @@ protected: // Can only create subclasses
   bool SkipSpace = true;
   bool AllowAtInIdentifier;
   bool IsAtStartOfStatement = true;
+  bool LexMasmIntegers = false;
   AsmCommentConsumer *CommentConsumer = nullptr;
 
   MCAsmLexer();
@@ -146,6 +147,10 @@ public:
   void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
     this->CommentConsumer = CommentConsumer;
   }
+
+  /// Set whether to lex masm-style binary and hex literals. They look like
+  /// 0b1101 and 0ABCh respectively.
+  void setLexMasmIntegers(bool V) { LexMasmIntegers = V; }
 };
 
 } // end namespace llvm

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Wed Oct 24 13:23:57 2018
@@ -156,9 +156,10 @@ void AsmPrinter::EmitInlineAsm(StringRef
   Parser->setAssemblerDialect(Dialect);
   Parser->setTargetParser(*TAP.get());
   Parser->setEnablePrintSchedInfo(EnablePrintSchedInfo);
+  // Enable lexing Masm binary and hex integer literals in intel inline
+  // assembly.
   if (Dialect == InlineAsm::AD_Intel)
-    // We need this flag to be able to parse numbers like "0bH"
-    Parser->setParsingInlineAsm(true);
+    Parser->getLexer().setLexMasmIntegers(true);
   if (MF) {
     const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
     TAP->SetFrameRegister(TRI->getFrameRegister(*MF));

Modified: llvm/trunk/lib/MC/MCParser/AsmLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCParser/AsmLexer.cpp?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/lib/MC/MCParser/AsmLexer.cpp (original)
+++ llvm/trunk/lib/MC/MCParser/AsmLexer.cpp Wed Oct 24 13:23:57 2018
@@ -243,22 +243,26 @@ static void SkipIgnoredIntegerSuffix(con
 
 // Look ahead to search for first non-hex digit, if it's [hH], then we treat the
 // integer as a hexadecimal, possibly with leading zeroes.
-static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
-  const char *FirstHex = nullptr;
+static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
+                               bool LexHex) {
+  const char *FirstNonDec = nullptr;
   const char *LookAhead = CurPtr;
   while (true) {
     if (isDigit(*LookAhead)) {
       ++LookAhead;
-    } else if (isHexDigit(*LookAhead)) {
-      if (!FirstHex)
-        FirstHex = LookAhead;
-      ++LookAhead;
     } else {
-      break;
+      if (!FirstNonDec)
+        FirstNonDec = LookAhead;
+
+      // Keep going if we are looking for a 'h' suffix.
+      if (LexHex && isHexDigit(*LookAhead))
+        ++LookAhead;
+      else
+        break;
     }
   }
-  bool isHex = *LookAhead == 'h' || *LookAhead == 'H';
-  CurPtr = isHex || !FirstHex ? LookAhead : FirstHex;
+  bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H');
+  CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;
   if (isHex)
     return 16;
   return DefaultRadix;
@@ -281,7 +285,7 @@ static AsmToken intToken(StringRef Ref,
 AsmToken AsmLexer::LexDigit() {
   // MASM-flavor binary integer: [01]+[bB]
   // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
-  if (IsParsingMSInlineAsm && isdigit(CurPtr[-1])) {
+  if (LexMasmIntegers && isdigit(CurPtr[-1])) {
     const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
                                    CurPtr - 1 : nullptr;
     const char *OldCurPtr = CurPtr;
@@ -320,7 +324,7 @@ AsmToken AsmLexer::LexDigit() {
 
   // Decimal integer: [1-9][0-9]*
   if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
-    unsigned Radix = doLookAhead(CurPtr, 10);
+    unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
     bool isHex = Radix == 16;
     // Check for floating point literals.
     if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
@@ -335,8 +339,8 @@ AsmToken AsmLexer::LexDigit() {
       return ReturnError(TokStart, !isHex ? "invalid decimal number" :
                            "invalid hexdecimal number");
 
-    // Consume the [bB][hH].
-    if (Radix == 2 || Radix == 16)
+    // Consume the [hH].
+    if (LexMasmIntegers && Radix == 16)
       ++CurPtr;
 
     // The darwin/x86 (and x86-64) assembler accepts and ignores type
@@ -346,7 +350,7 @@ AsmToken AsmLexer::LexDigit() {
     return intToken(Result, Value);
   }
 
-  if (!IsParsingMSInlineAsm && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
+  if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
     ++CurPtr;
     // See if we actually have "0b" as part of something like "jmp 0b\n"
     if (!isDigit(CurPtr[0])) {
@@ -395,7 +399,7 @@ AsmToken AsmLexer::LexDigit() {
       return ReturnError(TokStart, "invalid hexadecimal number");
 
     // Consume the optional [hH].
-    if (!IsParsingMSInlineAsm && (*CurPtr == 'h' || *CurPtr == 'H'))
+    if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H'))
       ++CurPtr;
 
     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
@@ -407,7 +411,7 @@ AsmToken AsmLexer::LexDigit() {
 
   // Either octal or hexadecimal.
   APInt Value(128, 0, true);
-  unsigned Radix = doLookAhead(CurPtr, 8);
+  unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
   bool isHex = Radix == 16;
   StringRef Result(TokStart, CurPtr - TokStart);
   if (Result.getAsInteger(Radix, Value))

Modified: llvm/trunk/lib/MC/MCParser/AsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCParser/AsmParser.cpp?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/lib/MC/MCParser/AsmParser.cpp (original)
+++ llvm/trunk/lib/MC/MCParser/AsmParser.cpp Wed Oct 24 13:23:57 2018
@@ -229,7 +229,9 @@ public:
 
   void setParsingInlineAsm(bool V) override {
     ParsingInlineAsm = V;
-    Lexer.setParsingMSInlineAsm(V);
+    // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
+    // hex integer literals.
+    Lexer.setLexMasmIntegers(V);
   }
   bool isParsingInlineAsm() override { return ParsingInlineAsm; }
 

Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp (original)
+++ llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp Wed Oct 24 13:23:57 2018
@@ -3283,7 +3283,6 @@ bool X86AsmParser::ParseDirective(AsmTok
   if (IDVal.startswith(".code"))
     return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
   else if (IDVal.startswith(".att_syntax")) {
-    getParser().setParsingInlineAsm(false);
     if (getLexer().isNot(AsmToken::EndOfStatement)) {
       if (Parser.getTok().getString() == "prefix")
         Parser.Lex();
@@ -3296,7 +3295,6 @@ bool X86AsmParser::ParseDirective(AsmTok
     return false;
   } else if (IDVal.startswith(".intel_syntax")) {
     getParser().setAssemblerDialect(1);
-    getParser().setParsingInlineAsm(true);
     if (getLexer().isNot(AsmToken::EndOfStatement)) {
       if (Parser.getTok().getString() == "noprefix")
         Parser.Lex();

Added: llvm/trunk/test/MC/AArch64/macro-hex-int.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AArch64/macro-hex-int.s?rev=345189&view=auto
==============================================================================
--- llvm/trunk/test/MC/AArch64/macro-hex-int.s (added)
+++ llvm/trunk/test/MC/AArch64/macro-hex-int.s Wed Oct 24 13:23:57 2018
@@ -0,0 +1,8 @@
+// RUN: llvm-mc -triple aarch64-elf -filetype=obj %s -o - | llvm-objdump -d -r - | FileCheck %s
+
+.macro do_add sz
+        add     v0.\sz, v0.\sz, v0.\sz
+.endm
+
+do_add 8h
+// CHECK:  add     v0.8h, v0.8h, v0.8h

Modified: llvm/trunk/test/MC/X86/intel-syntax-hex.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/intel-syntax-hex.s?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/intel-syntax-hex.s (original)
+++ llvm/trunk/test/MC/X86/intel-syntax-hex.s Wed Oct 24 13:23:57 2018
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel %s | FileCheck %s
+// RUN: llvm-mc -masm-integers -triple x86_64-unknown-unknown -x86-asm-syntax=intel %s | FileCheck %s
 // rdar://12470373
 
 // Checks to make sure we parse the hexadecimal suffix properly.

Modified: llvm/trunk/test/MC/X86/pr27884.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/pr27884.s?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/pr27884.s (original)
+++ llvm/trunk/test/MC/X86/pr27884.s Wed Oct 24 13:23:57 2018
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown %s
+// RUN: llvm-mc -triple x86_64-unknown-unknown %s -masm-integers=1
 
 .intel_syntax
 add rbx, 0B0h

Modified: llvm/trunk/test/tools/llvm-mca/X86/intel-syntax.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/intel-syntax.s?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/intel-syntax.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/intel-syntax.s Wed Oct 24 13:23:57 2018
@@ -5,7 +5,7 @@
 
   .intel_syntax noprefix
   mov	eax, 1
-  mov	ebx, 0ffh
+  mov	ebx, 0xff
   imul	esi, edi
   lea	eax, [rsi + rdi]
 

Modified: llvm/trunk/tools/llvm-mc/llvm-mc.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc/llvm-mc.cpp?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mc/llvm-mc.cpp (original)
+++ llvm/trunk/tools/llvm-mc/llvm-mc.cpp Wed Oct 24 13:23:57 2018
@@ -164,6 +164,10 @@ MainFileName("main-file-name",
 static cl::opt<bool> SaveTempLabels("save-temp-labels",
                                     cl::desc("Don't discard temporary labels"));
 
+static cl::opt<bool> LexMasmIntegers(
+    "masm-integers",
+    cl::desc("Enable binary and hex masm integers (0b110 and 0ABCh)"));
+
 static cl::opt<bool> NoExecStack("no-exec-stack",
                                  cl::desc("File doesn't need an exec stack"));
 
@@ -293,6 +297,7 @@ static int AssembleInput(const char *Pro
     return SymbolResult;
   Parser->setShowParsedOperands(ShowInstOperands);
   Parser->setTargetParser(*TAP);
+  Parser->getLexer().setLexMasmIntegers(LexMasmIntegers);
 
   int Res = Parser->Run(NoInitialTextSection);
 




More information about the llvm-commits mailing list