[llvm-commits] [llvm] r164248 - in /llvm/trunk: include/llvm/MC/MCParser/MCAsmLexer.h lib/MC/MCParser/AsmLexer.cpp lib/MC/MCParser/AsmParser.cpp lib/MC/MCParser/MCAsmLexer.cpp test/MC/AsmParser/macros-darwin.s test/MC/AsmParser/macros.s

Preston Gurd preston.gurd at intel.com
Wed Sep 19 13:36:12 PDT 2012


Author: pgurd
Date: Wed Sep 19 15:36:12 2012
New Revision: 164248

URL: http://llvm.org/viewvc/llvm-project?rev=164248&view=rev
Log:
Add support for macro parameters/arguments delimited by spaces,
to improve compatibility with GNU as.

Based on a patch by PaX Team.

Fixed assertion failures on non-Darwin and added additional test cases.


Added:
    llvm/trunk/test/MC/AsmParser/macros-darwin.s
Modified:
    llvm/trunk/include/llvm/MC/MCParser/MCAsmLexer.h
    llvm/trunk/lib/MC/MCParser/AsmLexer.cpp
    llvm/trunk/lib/MC/MCParser/AsmParser.cpp
    llvm/trunk/lib/MC/MCParser/MCAsmLexer.cpp
    llvm/trunk/test/MC/AsmParser/macros.s

Modified: llvm/trunk/include/llvm/MC/MCParser/MCAsmLexer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCParser/MCAsmLexer.h?rev=164248&r1=164247&r2=164248&view=diff
==============================================================================
--- llvm/trunk/include/llvm/MC/MCParser/MCAsmLexer.h (original)
+++ llvm/trunk/include/llvm/MC/MCParser/MCAsmLexer.h Wed Sep 19 15:36:12 2012
@@ -40,6 +40,7 @@
     // No-value.
     EndOfStatement,
     Colon,
+    Space,
     Plus, Minus, Tilde,
     Slash,    // '/'
     BackSlash, // '\'
@@ -126,6 +127,7 @@
   void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
 protected: // Can only create subclasses.
   const char *TokStart;
+  bool SkipSpace;
 
   MCAsmLexer();
 
@@ -175,6 +177,9 @@
 
   /// isNot - Check if the current token has kind \p K.
   bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
+
+  /// setSkipSpace - Set whether spaces should be ignored by the lexer
+  void setSkipSpace(bool val) { SkipSpace = val; }
 };
 
 } // End llvm namespace

Modified: llvm/trunk/lib/MC/MCParser/AsmLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCParser/AsmLexer.cpp?rev=164248&r1=164247&r2=164248&view=diff
==============================================================================
--- llvm/trunk/lib/MC/MCParser/AsmLexer.cpp (original)
+++ llvm/trunk/lib/MC/MCParser/AsmLexer.cpp Wed Sep 19 15:36:12 2012
@@ -396,8 +396,17 @@
   case 0:
   case ' ':
   case '\t':
-    // Ignore whitespace.
-    return LexToken();
+    if (SkipSpace) {
+      // Ignore whitespace.
+      return LexToken();
+    } else {
+      int len = 1;
+      while (*CurPtr==' ' || *CurPtr=='\t') {
+        CurPtr++;
+        len++;
+      }
+      return AsmToken(AsmToken::Space, StringRef(TokStart, len));
+    }
   case '\n': // FALL THROUGH.
   case '\r':
     isAtStartOfLine = true;

Modified: llvm/trunk/lib/MC/MCParser/AsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCParser/AsmParser.cpp?rev=164248&r1=164247&r2=164248&view=diff
==============================================================================
--- llvm/trunk/lib/MC/MCParser/AsmParser.cpp (original)
+++ llvm/trunk/lib/MC/MCParser/AsmParser.cpp Wed Sep 19 15:36:12 2012
@@ -130,6 +130,9 @@
   /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
   unsigned AssemblerDialect;
 
+  /// IsDarwin - is Darwin compatibility enabled?
+  bool IsDarwin;
+
 public:
   AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
             const MCAsmInfo &MAI);
@@ -209,7 +212,8 @@
 
   virtual void EatToEndOfStatement();
 
-  bool ParseMacroArgument(MacroArgument &MA);
+  bool ParseMacroArgument(MacroArgument &MA,
+                          AsmToken::TokenKind &ArgumentDelimiter);
   bool ParseMacroArguments(const Macro *M, MacroArguments &A);
 
   /// \brief Parse up to the end of statement and a return the contents from the
@@ -407,8 +411,8 @@
                      MCStreamer &_Out, const MCAsmInfo &_MAI)
   : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
     GenericParser(new GenericAsmParser), PlatformParser(0),
-    CurBuffer(0), MacrosEnabled(true), CppHashLineNumber(0), 
-    AssemblerDialect(~0U) {
+    CurBuffer(0), MacrosEnabled(true), CppHashLineNumber(0),
+    AssemblerDialect(~0U), IsDarwin(false) {
   // Save the old handler.
   SavedDiagHandler = SrcMgr.getDiagHandler();
   SavedDiagContext = SrcMgr.getDiagContext();
@@ -429,6 +433,7 @@
   } else if (_MAI.hasSubsectionsViaSymbols()) {
     PlatformParser = createDarwinAsmParser();
     PlatformParser->Initialize(*this);
+    IsDarwin = true;
   } else {
     PlatformParser = createELFAsmParser();
     PlatformParser->Initialize(*this);
@@ -1471,6 +1476,8 @@
   if (NParameters != 0 && NParameters != A.size())
     return Error(L, "Wrong number of arguments");
 
+  // A macro without parameters is handled differently on Darwin:
+  // gas accepts no arguments and does no substitutions
   while (!Body.empty()) {
     // Scan for the next substitution.
     std::size_t End = Body.size(), Pos = 0;
@@ -1537,15 +1544,23 @@
         if (Parameters[Index].first == Argument)
           break;
 
-      // FIXME: We should error at the macro definition.
-      if (Index == NParameters)
-        return Error(L, "Parameter not found");
-
-      for (MacroArgument::const_iterator it = A[Index].begin(),
-             ie = A[Index].end(); it != ie; ++it)
-        OS << it->getString();
+      if (Index == NParameters) {
+          if (Body[Pos+1] == '(' && Body[Pos+2] == ')')
+            Pos += 3;
+          else {
+            OS << '\\' << Argument;
+            Pos = I;
+          }
+      } else {
+        for (MacroArgument::const_iterator it = A[Index].begin(),
+               ie = A[Index].end(); it != ie; ++it)
+          if (it->getKind() == AsmToken::String)
+            OS << it->getStringContents();
+          else
+            OS << it->getString();
 
-      Pos += 1 + Argument.size();
+        Pos += 1 + Argument.size();
+      }
     }
     // Update the scan point.
     Body = Body.substr(Pos);
@@ -1560,22 +1575,97 @@
 {
 }
 
+static bool IsOperator(AsmToken::TokenKind kind)
+{
+  switch (kind)
+  {
+    default:
+      return false;
+    case AsmToken::Plus:
+    case AsmToken::Minus:
+    case AsmToken::Tilde:
+    case AsmToken::Slash:
+    case AsmToken::Star:
+    case AsmToken::Dot:
+    case AsmToken::Equal:
+    case AsmToken::EqualEqual:
+    case AsmToken::Pipe:
+    case AsmToken::PipePipe:
+    case AsmToken::Caret:
+    case AsmToken::Amp:
+    case AsmToken::AmpAmp:
+    case AsmToken::Exclaim:
+    case AsmToken::ExclaimEqual:
+    case AsmToken::Percent:
+    case AsmToken::Less:
+    case AsmToken::LessEqual:
+    case AsmToken::LessLess:
+    case AsmToken::LessGreater:
+    case AsmToken::Greater:
+    case AsmToken::GreaterEqual:
+    case AsmToken::GreaterGreater:
+      return true;
+  }
+}
+
 /// ParseMacroArgument - Extract AsmTokens for a macro argument.
 /// This is used for both default macro parameter values and the
 /// arguments in macro invocations
-bool AsmParser::ParseMacroArgument(MacroArgument &MA) {
+bool AsmParser::ParseMacroArgument(MacroArgument &MA,
+                                   AsmToken::TokenKind &ArgumentDelimiter) {
   unsigned ParenLevel = 0;
+  unsigned AddTokens = 0;
+
+  // gas accepts arguments separated by whitespace, except on Darwin
+  if (!IsDarwin)
+    Lexer.setSkipSpace(false);
 
   for (;;) {
-    if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
+    if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal)) {
+      Lexer.setSkipSpace(true);
       return TokError("unexpected token in macro instantiation");
+    }
+
+    if (ParenLevel == 0 && Lexer.is(AsmToken::Comma)) {
+      // Spaces and commas cannot be mixed to delimit parameters
+      if (ArgumentDelimiter == AsmToken::Eof)
+        ArgumentDelimiter = AsmToken::Comma;
+      else if (ArgumentDelimiter != AsmToken::Comma) {
+        Lexer.setSkipSpace(true);
+        return TokError("expected ' ' for macro argument separator");
+      }
+      break;
+    }
+
+    if (Lexer.is(AsmToken::Space)) {
+      Lex(); // Eat spaces
+
+      // Spaces can delimit parameters, but could also be part an expression.
+      // If the token after a space is an operator, add the token and the next
+      // one into this argument
+      if (ArgumentDelimiter == AsmToken::Space ||
+          ArgumentDelimiter == AsmToken::Eof) {
+        if (IsOperator(Lexer.getKind())) {
+          // Check to see whether the token is used as an operator,
+          // or part of an identifier
+          const char *NextChar = getTok().getEndLoc().getPointer() + 1;
+          if (*NextChar == ' ')
+            AddTokens = 2;
+        }
+
+        if (!AddTokens && ParenLevel == 0) {
+          if (ArgumentDelimiter == AsmToken::Eof &&
+              !IsOperator(Lexer.getKind()))
+            ArgumentDelimiter = AsmToken::Space;
+          break;
+        }
+      }
+    }
 
     // HandleMacroEntry relies on not advancing the lexer here
     // to be able to fill in the remaining default parameter values
     if (Lexer.is(AsmToken::EndOfStatement))
       break;
-    if (ParenLevel == 0 && Lexer.is(AsmToken::Comma))
-      break;
 
     // Adjust the current parentheses level.
     if (Lexer.is(AsmToken::LParen))
@@ -1585,8 +1675,12 @@
 
     // Append the token to the current argument list.
     MA.push_back(getTok());
+    if (AddTokens)
+      AddTokens--;
     Lex();
   }
+
+  Lexer.setSkipSpace(true);
   if (ParenLevel != 0)
     return TokError("unbalanced parentheses in macro argument");
   return false;
@@ -1595,6 +1689,9 @@
 // Parse the macro instantiation arguments.
 bool AsmParser::ParseMacroArguments(const Macro *M, MacroArguments &A) {
   const unsigned NParameters = M ? M->Parameters.size() : 0;
+  // Argument delimiter is initially unknown. It will be set by
+  // ParseMacroArgument()
+  AsmToken::TokenKind ArgumentDelimiter = AsmToken::Eof;
 
   // Parse two kinds of macro invocations:
   // - macros defined without any parameters accept an arbitrary number of them
@@ -1603,7 +1700,7 @@
        ++Parameter) {
     MacroArgument MA;
 
-    if (ParseMacroArgument(MA))
+    if (ParseMacroArgument(MA, ArgumentDelimiter))
       return true;
 
     if (!MA.empty() || !NParameters)
@@ -3105,6 +3202,9 @@
     return TokError("expected identifier in '.macro' directive");
 
   MacroParameters Parameters;
+  // Argument delimiter is initially unknown. It will be set by
+  // ParseMacroArgument()
+  AsmToken::TokenKind ArgumentDelimiter = AsmToken::Eof;
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     for (;;) {
       MacroParameter Parameter;
@@ -3113,21 +3213,19 @@
 
       if (getLexer().is(AsmToken::Equal)) {
         Lex();
-        if (getParser().ParseMacroArgument(Parameter.second))
+        if (getParser().ParseMacroArgument(Parameter.second, ArgumentDelimiter))
           return true;
       }
 
       Parameters.push_back(Parameter);
 
-      if (getLexer().isNot(AsmToken::Comma))
+      if (getLexer().is(AsmToken::Comma))
+        Lex();
+      else if (getLexer().is(AsmToken::EndOfStatement))
         break;
-      Lex();
     }
   }
 
-  if (getLexer().isNot(AsmToken::EndOfStatement))
-    return TokError("unexpected token in '.macro' directive");
-
   // Eat the end of statement.
   Lex();
 

Modified: llvm/trunk/lib/MC/MCParser/MCAsmLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCParser/MCAsmLexer.cpp?rev=164248&r1=164247&r2=164248&view=diff
==============================================================================
--- llvm/trunk/lib/MC/MCParser/MCAsmLexer.cpp (original)
+++ llvm/trunk/lib/MC/MCParser/MCAsmLexer.cpp Wed Sep 19 15:36:12 2012
@@ -12,7 +12,8 @@
 
 using namespace llvm;
 
-MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()), TokStart(0) {
+MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()),
+                           TokStart(0), SkipSpace(true) {
 }
 
 MCAsmLexer::~MCAsmLexer() {

Added: llvm/trunk/test/MC/AsmParser/macros-darwin.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AsmParser/macros-darwin.s?rev=164248&view=auto
==============================================================================
--- llvm/trunk/test/MC/AsmParser/macros-darwin.s (added)
+++ llvm/trunk/test/MC/AsmParser/macros-darwin.s Wed Sep 19 15:36:12 2012
@@ -0,0 +1,9 @@
+// RUN: not llvm-mc -triple i386-apple-darwin10 %s 2> %t.err | FileCheck %s
+
+.macro test1
+.globl "$0 $1 $2 $$3 $n"
+.endmacro
+
+// CHECK: .globl "1 23  $3 2"
+test1 1, 2 3
+

Modified: llvm/trunk/test/MC/AsmParser/macros.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AsmParser/macros.s?rev=164248&r1=164247&r2=164248&view=diff
==============================================================================
--- llvm/trunk/test/MC/AsmParser/macros.s (original)
+++ llvm/trunk/test/MC/AsmParser/macros.s Wed Sep 19 15:36:12 2012
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err | FileCheck %s
+// RUN: not llvm-mc -triple i386-unknown-unknown %s 2> %t.err | FileCheck %s
 // RUN: FileCheck --check-prefix=CHECK-ERRORS %s < %t.err
 
 .macro .test0
@@ -28,33 +28,66 @@
 .globl "$0 $1 $2 $$3 $n"
 .endmacro
 
-// CHECK: .globl	"1 23  $3 2"
-test3 1,2 3
+// CHECK: .globl	"1 (23)  $3 2"
+test3 1, (2 3)
+
+// CHECK: .globl "1 2  $3 2"
+test3 1 2
 
 .macro test4
 .globl "$0 -- $1"
 .endmacro
 
-// CHECK: .globl	"ab)(,) -- (cd)"
-test4 a b)(,),(cd)
+// CHECK: .globl  "(ab)(,)) -- (cd)"
+test4 (a b)(,)),(cd)
+
+// CHECK: .globl  "(ab)(,)) -- (cd)"
+test4 (a b)(,)),(cd)
 
 .macro test5 _a
 .globl "\_a"
 .endm
 
-test5 zed1
 // CHECK: .globl zed1
+test5 zed1
 
 .macro test6 $a
 .globl "\$a"
 .endm
 
-test6 zed2
 // CHECK: .globl zed2
+test6 zed2
 
 .macro test7 .a
 .globl "\.a"
 .endm
 
-test7 zed3
 // CHECK: .globl zed3
+test7 zed3
+
+.macro test8 _a, _b, _c
+.globl "\_a,\_b,\_c"
+.endmacro
+
+.macro test9 _a _b _c
+.globl "\_a \_b \_c"
+.endmacro
+
+// CHECK: .globl  "a,b,c"
+test8 a, b, c
+// CHECK: .globl  "%1,%2,%3"
+test8 %1 %2 %3 #a comment
+// CHECK: .globl "x-y,z,1"
+test8 x - y z 1
+// CHECK: .globl  "1 2 3"
+test9 1, 2,3
+
+test8 1,2 3
+// CHECK-ERRORS: error: macro argument '_c' is missing
+// CHECK-ERRORS-NEXT: test8 1,2 3
+// CHECK-ERRORS-NEXT:           ^
+
+test8 1 2, 3
+// CHECK-ERRORS: error: expected ' ' for macro argument separator
+// CHECK-ERRORS-NEXT:test8 1 2, 3
+// CHECK-ERRORS-NEXT:         ^





More information about the llvm-commits mailing list