[llvm] feb7453 - [ms] [llvm-ml] Accept whitespace around the dot operator

Eric Astor via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 29 14:01:32 PDT 2020


Author: Eric Astor
Date: 2020-09-29T17:01:13-04:00
New Revision: feb74530f86516de211d8b91eab426fc39d1b3e8

URL: https://github.com/llvm/llvm-project/commit/feb74530f86516de211d8b91eab426fc39d1b3e8
DIFF: https://github.com/llvm/llvm-project/commit/feb74530f86516de211d8b91eab426fc39d1b3e8.diff

LOG: [ms] [llvm-ml] Accept whitespace around the dot operator

MASM allows arbitrary whitespace around the Intel dot operator, especially when used for struct field lookup

Reviewed By: rnk

Differential Revision: https://reviews.llvm.org/D88450

Added: 
    llvm/test/tools/llvm-ml/dot_operator.test

Modified: 
    llvm/lib/MC/MCParser/MasmParser.cpp
    llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
    llvm/test/tools/llvm-ml/struct.test

Removed: 
    


################################################################################
diff  --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 0d5d6a112902..c574b8715b16 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -6728,6 +6728,8 @@ bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member,
     Info.Type.Length = Field.LengthOf;
     if (Field.Contents.FT == FT_STRUCT)
       Info.Type.Name = Field.Contents.StructInfo.Structure.Name;
+    else
+      Info.Type.Name = "";
     return false;
   }
 

diff  --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 7a7c81000a2c..1f594c54c410 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1674,6 +1674,18 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
       if (ParseIntelDotOperator(SM, End))
         return true;
       break;
+    case AsmToken::Dot:
+      if (!Parser.isParsingMasm()) {
+        if ((Done = SM.isValidEndState()))
+          break;
+        return Error(Tok.getLoc(), "unknown token in expression");
+      }
+      // MASM allows spaces around the dot operator (e.g., "var . x")
+      Lex();
+      UpdateLocLex = false;
+      if (ParseIntelDotOperator(SM, End))
+        return true;
+      break;
     case AsmToken::Dollar:
       if (!Parser.isParsingMasm()) {
         if ((Done = SM.isValidEndState()))
@@ -1687,6 +1699,23 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
       SMLoc IdentLoc = Tok.getLoc();
       StringRef Identifier = Tok.getString();
       UpdateLocLex = false;
+      if (Parser.isParsingMasm()) {
+        size_t DotOffset = Identifier.find_first_of('.');
+        if (DotOffset != StringRef::npos) {
+          consumeToken();
+          StringRef LHS = Identifier.slice(0, DotOffset);
+          StringRef Dot = Identifier.slice(DotOffset, DotOffset + 1);
+          StringRef RHS = Identifier.slice(DotOffset + 1, StringRef::npos);
+          if (!RHS.empty()) {
+            getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS));
+          }
+          getLexer().UnLex(AsmToken(AsmToken::Dot, Dot));
+          if (!LHS.empty()) {
+            getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS));
+          }
+          break;
+        }
+      }
       // (MASM only) <TYPE> PTR operator
       if (Parser.isParsingMasm()) {
         const AsmToken &NextTok = getLexer().peekTok();
@@ -1744,7 +1773,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
       }
       // Symbol reference, when parsing assembly content
       InlineAsmIdentifierInfo Info;
-      AsmTypeInfo Type;
+      AsmFieldInfo FieldInfo;
       const MCExpr *Val;
       if (isParsingMSInlineAsm() || Parser.isParsingMasm()) {
         // MS Dot Operator expression
@@ -1761,8 +1790,9 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
           if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
             if (SM.onInteger(Val, ErrMsg))
               return Error(IdentLoc, ErrMsg);
-          } else
+          } else {
             return true;
+          }
           break;
         }
         // MS InlineAsm identifier
@@ -1771,7 +1801,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
           return Error(IdentLoc, "expected identifier");
         if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
           return true;
-        else if (SM.onIdentifierExpr(Val, Identifier, Info, Type, true, ErrMsg))
+        else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
+                                     true, ErrMsg))
           return Error(IdentLoc, ErrMsg);
         break;
       }
@@ -1784,11 +1815,35 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
             return Error(IdentLoc, ErrMsg);
           break;
         }
+        if (!getParser().lookUpType(Identifier, FieldInfo.Type)) {
+          // Field offset immediate; <TYPE>.<field specification>
+          Lex(); // eat type
+          bool EndDot = parseOptionalToken(AsmToken::Dot);
+          while (EndDot || (getTok().is(AsmToken::Identifier) &&
+                            getTok().getString().startswith("."))) {
+            getParser().parseIdentifier(Identifier);
+            if (!EndDot)
+              Identifier.consume_front(".");
+            EndDot = Identifier.consume_back(".");
+            if (getParser().lookUpField(FieldInfo.Type.Name, Identifier,
+                                        FieldInfo)) {
+              SMLoc IDEnd =
+                  SMLoc::getFromPointer(Identifier.data() + Identifier.size());
+              return Error(IdentLoc, "Unable to lookup field reference!",
+                           SMRange(IdentLoc, IDEnd));
+            }
+            if (!EndDot)
+              EndDot = parseOptionalToken(AsmToken::Dot);
+          }
+          if (SM.onInteger(FieldInfo.Offset, ErrMsg))
+            return Error(IdentLoc, ErrMsg);
+          break;
+        }
       }
-      if (getParser().parsePrimaryExpr(Val, End, &Type)) {
+      if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) {
         return Error(Tok.getLoc(), "Unexpected identifier!");
-      } else if (SM.onIdentifierExpr(Val, Identifier, Info, Type, false,
-                                     ErrMsg)) {
+      } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
+                                     false, ErrMsg)) {
         return Error(IdentLoc, ErrMsg);
       }
       break;
@@ -2006,6 +2061,7 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
   StringRef DotDispStr = Tok.getString();
   if (DotDispStr.startswith("."))
     DotDispStr = DotDispStr.drop_front(1);
+  StringRef TrailingDot;
 
   // .Imm gets lexed as a real.
   if (Tok.is(AsmToken::Real)) {
@@ -2014,6 +2070,10 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
     Info.Offset = DotDisp.getZExtValue();
   } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
              Tok.is(AsmToken::Identifier)) {
+    if (DotDispStr.endswith(".")) {
+      TrailingDot = DotDispStr.substr(DotDispStr.size() - 1);
+      DotDispStr = DotDispStr.drop_back(1);
+    }
     const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
     const StringRef Base = BaseMember.first, Member = BaseMember.second;
     if (getParser().lookUpField(SM.getType(), DotDispStr, Info) &&
@@ -2031,6 +2091,8 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
   const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
   while (Tok.getLoc().getPointer() < DotExprEndLoc)
     Lex();
+  if (!TrailingDot.empty())
+    getLexer().UnLex(AsmToken(AsmToken::Dot, TrailingDot));
   SM.addImm(Info.Offset);
   SM.setTypeInfo(Info.Type);
   return false;

diff  --git a/llvm/test/tools/llvm-ml/dot_operator.test b/llvm/test/tools/llvm-ml/dot_operator.test
new file mode 100644
index 000000000000..bbea6152e8db
--- /dev/null
+++ b/llvm/test/tools/llvm-ml/dot_operator.test
@@ -0,0 +1,67 @@
+# RUN: llvm-ml -filetype=asm %s | FileCheck %s
+
+.data
+
+FOO STRUCT
+  a BYTE ?
+  b BYTE ?
+  c BYTE ?
+  d BYTE ?
+FOO ENDS
+
+BAR STRUCT
+  e WORD ?
+  f WORD ?
+BAR ENDS
+
+var FOO <>
+
+.code
+
+t1:
+mov al, var.a
+mov al, var. b
+mov al, var .c
+mov al, var . d
+
+; CHECK-LABEL: t1:
+; CHECK: mov al, byte ptr [rip + var]
+; CHECK: mov al, byte ptr [rip + var+1]
+; CHECK: mov al, byte ptr [rip + var+2]
+; CHECK: mov al, byte ptr [rip + var+3]
+
+t2:
+mov eax, FOO.a
+mov ax, FOO. b
+mov al, FOO .c
+mov eax, FOO . d
+
+; CHECK-LABEL: t2:
+; CHECK: mov eax, 0
+; CHECK: mov ax, 1
+; CHECK: mov al, 2
+; CHECK: mov eax, 3
+
+t3:
+mov al, BYTE PTR var[FOO.c]
+
+; CHECK-LABEL: t3:
+; CHECK: mov al, byte ptr [rip + var+2]
+
+t4:
+mov ax, var.BAR.f
+mov ax, var .BAR.f
+mov ax, var. BAR.f
+mov ax, var.BAR .f
+mov ax, var.BAR. f
+mov ax, var . BAR . f
+
+; CHECK-LABEL: t4:
+; CHECK: mov ax, word ptr [rip + var+2]
+; CHECK: mov ax, word ptr [rip + var+2]
+; CHECK: mov ax, word ptr [rip + var+2]
+; CHECK: mov ax, word ptr [rip + var+2]
+; CHECK: mov ax, word ptr [rip + var+2]
+; CHECK: mov ax, word ptr [rip + var+2]
+
+END

diff  --git a/llvm/test/tools/llvm-ml/struct.test b/llvm/test/tools/llvm-ml/struct.test
index facd7c14e4f4..479d31c8121f 100644
--- a/llvm/test/tools/llvm-ml/struct.test
+++ b/llvm/test/tools/llvm-ml/struct.test
@@ -140,7 +140,7 @@ mov al, [t2.FOOBAR.e.b]
 ; CHECK-NEXT: mov al, byte ptr [rip + t2+9]
 ; CHECK-NEXT: mov al, byte ptr [rip + t2+9]
 ; CHECK-NEXT: mov al, byte ptr [rip + t2+9]
-; CHECK-NEXT: mov al, byte ptr [rip + (t2+8)+1]
+; CHECK-NEXT: mov al, byte ptr [rip + t2+9]
 ; CHECK-NEXT: mov al, byte ptr [rip + t2+9]
 
 QUUX STRUCT


        


More information about the llvm-commits mailing list