[llvm] 678211d - [ms] [llvm-ml] Standardize blocking of lexical substitution

Eric Astor via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 2 11:20:14 PDT 2021


Author: Eric Astor
Date: 2021-07-02T14:17:37-04:00
New Revision: 678211de6d5f75c22beb72ec1203b1e857ddebf3

URL: https://github.com/llvm/llvm-project/commit/678211de6d5f75c22beb72ec1203b1e857ddebf3
DIFF: https://github.com/llvm/llvm-project/commit/678211de6d5f75c22beb72ec1203b1e857ddebf3.diff

LOG: [ms] [llvm-ml] Standardize blocking of lexical substitution

In MASM, the ifdef family of directives treats its argument literally, without expanding it as a text macro. Add support for this, and also replace the special handling that was previously used for echo.

Reviewed By: thakis

Differential Revision: https://reviews.llvm.org/D104196

Added: 
    

Modified: 
    llvm/lib/MC/MCParser/MasmParser.cpp
    llvm/test/tools/llvm-ml/command_line_defines.asm

Removed: 
    


################################################################################
diff  --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 6edcfe22a84ea..a91623770116a 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -492,7 +492,9 @@ class MasmParser : public MCAsmParser {
   bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) override;
   bool printError(SMLoc L, const Twine &Msg, SMRange Range = None) override;
 
-  const AsmToken &Lex() override;
+  enum ExpandKind { ExpandMacros, DoNotExpandMacros };
+  const AsmToken &Lex(ExpandKind ExpandNextToken);
+  const AsmToken &Lex() override { return Lex(ExpandMacros); }
 
   void setParsingMSInlineAsm(bool V) override {
     ParsingMSInlineAsm = V;
@@ -535,7 +537,11 @@ class MasmParser : public MCAsmParser {
 
   /// Parse an identifier or string (as a quoted identifier)
   /// and set \p Res to the identifier contents.
-  bool parseIdentifier(StringRef &Res) override;
+  enum IdentifierPositionKind { StandardPosition, StartOfStatement };
+  bool parseIdentifier(StringRef &Res, IdentifierPositionKind Position);
+  bool parseIdentifier(StringRef &Res) override {
+    return parseIdentifier(Res, StandardPosition);
+  }
   void eatToEndOfStatement() override;
 
   bool checkForValidSection() override;
@@ -543,6 +549,7 @@ class MasmParser : public MCAsmParser {
   /// }
 
 private:
+  bool expandMacros();
   const AsmToken peekTok(bool ShouldSkipSpace = true);
 
   bool parseStatement(ParseStatementInfo &Info,
@@ -1008,7 +1015,7 @@ class MasmParser : public MCAsmParser {
   bool parseDirectiveRadix(SMLoc DirectiveLoc);
 
   // "echo"
-  bool parseDirectiveEcho();
+  bool parseDirectiveEcho(SMLoc DirectiveLoc);
 
   void initializeDirectiveKindMap();
   void initializeCVDefRangeTypeMap();
@@ -1116,7 +1123,43 @@ void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer,
                   Loc.getPointer(), EndStatementAtEOF);
 }
 
-const AsmToken &MasmParser::Lex() {
+bool MasmParser::expandMacros() {
+  const AsmToken &Tok = getTok();
+
+  auto VarIt = Variables.find(Tok.getIdentifier().lower());
+  if (VarIt != Variables.end() && VarIt->second.IsText) {
+    std::unique_ptr<MemoryBuffer> Instantiation =
+        MemoryBuffer::getMemBufferCopy(VarIt->second.TextValue,
+                                       "<instantiation>");
+
+    // Jump to the macro instantiation and prime the lexer.
+    CurBuffer =
+        SrcMgr.AddNewSourceBuffer(std::move(Instantiation), Tok.getEndLoc());
+    Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
+                    /*EndStatementAtEOF=*/false);
+    EndStatementAtEOFStack.push_back(false);
+    Lexer.Lex();
+    return false;
+  }
+
+  const llvm::MCAsmMacro *M =
+      getContext().lookupMacro(Tok.getIdentifier().lower());
+  if (M && M->IsFunction && peekTok().is(AsmToken::LParen)) {
+    // This is a macro function invocation; expand it in place.
+    const SMLoc MacroLoc = Tok.getLoc();
+    const StringRef MacroId = Tok.getIdentifier();
+    Lexer.Lex();
+    if (handleMacroInvocation(M, MacroLoc)) {
+      Lexer.UnLex(AsmToken(AsmToken::Error, MacroId));
+      Lexer.Lex();
+    }
+    return false;
+  }
+
+  return true;
+}
+
+const AsmToken &MasmParser::Lex(ExpandKind ExpandNextToken) {
   if (Lexer.getTok().is(AsmToken::Error))
     Error(Lexer.getErrLoc(), Lexer.getErr());
 
@@ -1131,10 +1174,9 @@ const AsmToken &MasmParser::Lex() {
   const AsmToken *tok = &Lexer.Lex();
   bool StartOfStatement = Lexer.isAtStartOfStatement();
 
-  while (tok->is(AsmToken::Identifier)) {
+  while (ExpandNextToken == ExpandMacros && tok->is(AsmToken::Identifier)) {
     if (StartOfStatement) {
       AsmToken NextTok;
-
       MutableArrayRef<AsmToken> Buf(NextTok);
       size_t ReadCount = Lexer.peekTokens(Buf);
       if (ReadCount && NextTok.is(AsmToken::Identifier) &&
@@ -1145,34 +1187,8 @@ const AsmToken &MasmParser::Lex() {
         break;
       }
     }
-    auto it = Variables.find(tok->getIdentifier().lower());
-    const llvm::MCAsmMacro *M =
-        getContext().lookupMacro(tok->getIdentifier().lower());
-    if (it != Variables.end() && it->second.IsText) {
-      // This is a textmacro; expand it in place.
-      std::unique_ptr<MemoryBuffer> Instantiation =
-          MemoryBuffer::getMemBufferCopy(it->second.TextValue,
-                                         "<instantiation>");
-
-      // Jump to the macro instantiation and prime the lexer.
-      CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation),
-                                            getTok().getEndLoc());
-      Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
-                      /*EndStatementAtEOF=*/false);
-      EndStatementAtEOFStack.push_back(false);
-      tok = &Lexer.Lex();
-    } else if (M && M->IsFunction && peekTok().is(AsmToken::LParen)) {
-      // This is a macro function invocation; expand it in place.
-      const AsmToken MacroTok = *tok;
-      tok = &Lexer.Lex();
-      if (handleMacroInvocation(M, MacroTok.getLoc())) {
-        Lexer.UnLex(AsmToken(AsmToken::Error, MacroTok.getIdentifier()));
-        tok = &Lexer.Lex();
-      }
-      continue;
-    } else {
+    if (expandMacros())
       break;
-    }
   }
 
   // Parse comments here to be deferred until end of next statement.
@@ -2086,12 +2102,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
     Lex(); // always eat a token
     if (!IDVal.startswith("."))
       return Error(IDLoc, "unexpected token at start of statement");
-  } else if (Lexer.is(AsmToken::Identifier) &&
-             getTok().getString().equals_insensitive("echo")) {
-    // Intercept echo early to avoid lexical substitution in its message, and
-    // delegate all handling to the appropriate function.
-    return parseDirectiveEcho();
-  } else if (parseIdentifier(IDVal)) {
+  } else if (parseIdentifier(IDVal, StartOfStatement)) {
     if (!TheCondState.Ignore) {
       Lex(); // always eat a token
       return Error(IDLoc, "unexpected token at start of statement");
@@ -2472,6 +2483,8 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
       return parseDirectiveErrorIfe(IDLoc, false);
     case DK_RADIX:
       return parseDirectiveRadix(IDLoc);
+    case DK_ECHO:
+      return parseDirectiveEcho(IDLoc);
     }
 
     return Error(IDLoc, "unknown directive");
@@ -3307,7 +3320,8 @@ bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) {
 /// parseIdentifier:
 ///   ::= identifier
 ///   ::= string
-bool MasmParser::parseIdentifier(StringRef &Res) {
+bool MasmParser::parseIdentifier(StringRef &Res,
+                                 IdentifierPositionKind Position) {
   // The assembler has relaxed rules for accepting identifiers, in particular we
   // allow things like '.globl $foo' and '.def @feat.00', which would normally
   // be separate tokens. At this level, we have already lexed so we cannot
@@ -3341,7 +3355,17 @@ bool MasmParser::parseIdentifier(StringRef &Res) {
 
   Res = getTok().getIdentifier();
 
-  Lex(); // Consume the identifier token.
+  // Consume the identifier token - but if parsing certain directives, avoid
+  // lexical expansion of the next token.
+  ExpandKind ExpandNextToken = ExpandMacros;
+  if (Position == StartOfStatement &&
+      StringSwitch<bool>(Res)
+          .CaseLower("echo", true)
+          .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true)
+          .Default(false)) {
+    ExpandNextToken = DoNotExpandMacros;
+  }
+  Lex(ExpandNextToken);
 
   return false;
 }
@@ -7081,14 +7105,7 @@ bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) {
 
 /// parseDirectiveEcho
 ///   ::= "echo" message
-bool MasmParser::parseDirectiveEcho() {
-  // We're called before the directive is parsed, to avoid triggering lexical
-  // substitutions in the message. Assert that the next token is the directive,
-  // then eat it without using the Parser's Lex method.
-  assert(getTok().is(AsmToken::Identifier) &&
-         getTok().getString().equals_insensitive("echo"));
-  Lexer.Lex();
-
+bool MasmParser::parseDirectiveEcho(SMLoc DirectiveLoc) {
   std::string Message = parseStringTo(AsmToken::EndOfStatement);
   llvm::outs() << Message;
   if (!StringRef(Message).endswith("\n"))

diff  --git a/llvm/test/tools/llvm-ml/command_line_defines.asm b/llvm/test/tools/llvm-ml/command_line_defines.asm
index cce838803a163..9ae413f072321 100644
--- a/llvm/test/tools/llvm-ml/command_line_defines.asm
+++ b/llvm/test/tools/llvm-ml/command_line_defines.asm
@@ -1,4 +1,4 @@
-; RUN: llvm-ml -filetype=s %s /Fo - /DT1=test1 /D T2=test2 /Dtest5=def | FileCheck %s
+; RUN: llvm-ml -filetype=s %s /Fo - /DT1=test1 /D T2=test2 /Dtest5=def /Dtest6 | FileCheck %s
 
 .code
 
@@ -51,4 +51,13 @@ test5 textequ <redef>
 ; CHECK-NEXT: .byte 101
 ; CHECK-NEXT: .byte 102
 
+t6:
+ifdef test6
+  xor eax, eax
+endif
+  ret
+; CHECK-LABEL: t6:
+; CHECK: xor eax, eax
+; CHECK: ret
+
 end


        


More information about the llvm-commits mailing list