[llvm] feb7453 - [ms] [llvm-ml] Accept whitespace around the dot operator
Eric Astor via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 29 14:01:32 PDT 2020
Author: Eric Astor
Date: 2020-09-29T17:01:13-04:00
New Revision: feb74530f86516de211d8b91eab426fc39d1b3e8
URL: https://github.com/llvm/llvm-project/commit/feb74530f86516de211d8b91eab426fc39d1b3e8
DIFF: https://github.com/llvm/llvm-project/commit/feb74530f86516de211d8b91eab426fc39d1b3e8.diff
LOG: [ms] [llvm-ml] Accept whitespace around the dot operator
MASM allows arbitrary whitespace around the Intel dot operator, especially when used for struct field lookup
Reviewed By: rnk
Differential Revision: https://reviews.llvm.org/D88450
Added:
llvm/test/tools/llvm-ml/dot_operator.test
Modified:
llvm/lib/MC/MCParser/MasmParser.cpp
llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
llvm/test/tools/llvm-ml/struct.test
Removed:
################################################################################
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 0d5d6a112902..c574b8715b16 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -6728,6 +6728,8 @@ bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member,
Info.Type.Length = Field.LengthOf;
if (Field.Contents.FT == FT_STRUCT)
Info.Type.Name = Field.Contents.StructInfo.Structure.Name;
+ else
+ Info.Type.Name = "";
return false;
}
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 7a7c81000a2c..1f594c54c410 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1674,6 +1674,18 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
if (ParseIntelDotOperator(SM, End))
return true;
break;
+ case AsmToken::Dot:
+ if (!Parser.isParsingMasm()) {
+ if ((Done = SM.isValidEndState()))
+ break;
+ return Error(Tok.getLoc(), "unknown token in expression");
+ }
+ // MASM allows spaces around the dot operator (e.g., "var . x")
+ Lex();
+ UpdateLocLex = false;
+ if (ParseIntelDotOperator(SM, End))
+ return true;
+ break;
case AsmToken::Dollar:
if (!Parser.isParsingMasm()) {
if ((Done = SM.isValidEndState()))
@@ -1687,6 +1699,23 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
SMLoc IdentLoc = Tok.getLoc();
StringRef Identifier = Tok.getString();
UpdateLocLex = false;
+ if (Parser.isParsingMasm()) {
+ size_t DotOffset = Identifier.find_first_of('.');
+ if (DotOffset != StringRef::npos) {
+ consumeToken();
+ StringRef LHS = Identifier.slice(0, DotOffset);
+ StringRef Dot = Identifier.slice(DotOffset, DotOffset + 1);
+ StringRef RHS = Identifier.slice(DotOffset + 1, StringRef::npos);
+ if (!RHS.empty()) {
+ getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS));
+ }
+ getLexer().UnLex(AsmToken(AsmToken::Dot, Dot));
+ if (!LHS.empty()) {
+ getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS));
+ }
+ break;
+ }
+ }
// (MASM only) <TYPE> PTR operator
if (Parser.isParsingMasm()) {
const AsmToken &NextTok = getLexer().peekTok();
@@ -1744,7 +1773,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
}
// Symbol reference, when parsing assembly content
InlineAsmIdentifierInfo Info;
- AsmTypeInfo Type;
+ AsmFieldInfo FieldInfo;
const MCExpr *Val;
if (isParsingMSInlineAsm() || Parser.isParsingMasm()) {
// MS Dot Operator expression
@@ -1761,8 +1790,9 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
if (SM.onInteger(Val, ErrMsg))
return Error(IdentLoc, ErrMsg);
- } else
+ } else {
return true;
+ }
break;
}
// MS InlineAsm identifier
@@ -1771,7 +1801,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
return Error(IdentLoc, "expected identifier");
if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
return true;
- else if (SM.onIdentifierExpr(Val, Identifier, Info, Type, true, ErrMsg))
+ else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
+ true, ErrMsg))
return Error(IdentLoc, ErrMsg);
break;
}
@@ -1784,11 +1815,35 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
return Error(IdentLoc, ErrMsg);
break;
}
+ if (!getParser().lookUpType(Identifier, FieldInfo.Type)) {
+ // Field offset immediate; <TYPE>.<field specification>
+ Lex(); // eat type
+ bool EndDot = parseOptionalToken(AsmToken::Dot);
+ while (EndDot || (getTok().is(AsmToken::Identifier) &&
+ getTok().getString().startswith("."))) {
+ getParser().parseIdentifier(Identifier);
+ if (!EndDot)
+ Identifier.consume_front(".");
+ EndDot = Identifier.consume_back(".");
+ if (getParser().lookUpField(FieldInfo.Type.Name, Identifier,
+ FieldInfo)) {
+ SMLoc IDEnd =
+ SMLoc::getFromPointer(Identifier.data() + Identifier.size());
+ return Error(IdentLoc, "Unable to lookup field reference!",
+ SMRange(IdentLoc, IDEnd));
+ }
+ if (!EndDot)
+ EndDot = parseOptionalToken(AsmToken::Dot);
+ }
+ if (SM.onInteger(FieldInfo.Offset, ErrMsg))
+ return Error(IdentLoc, ErrMsg);
+ break;
+ }
}
- if (getParser().parsePrimaryExpr(Val, End, &Type)) {
+ if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) {
return Error(Tok.getLoc(), "Unexpected identifier!");
- } else if (SM.onIdentifierExpr(Val, Identifier, Info, Type, false,
- ErrMsg)) {
+ } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
+ false, ErrMsg)) {
return Error(IdentLoc, ErrMsg);
}
break;
@@ -2006,6 +2061,7 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
StringRef DotDispStr = Tok.getString();
if (DotDispStr.startswith("."))
DotDispStr = DotDispStr.drop_front(1);
+ StringRef TrailingDot;
// .Imm gets lexed as a real.
if (Tok.is(AsmToken::Real)) {
@@ -2014,6 +2070,10 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
Info.Offset = DotDisp.getZExtValue();
} else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
Tok.is(AsmToken::Identifier)) {
+ if (DotDispStr.endswith(".")) {
+ TrailingDot = DotDispStr.substr(DotDispStr.size() - 1);
+ DotDispStr = DotDispStr.drop_back(1);
+ }
const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
const StringRef Base = BaseMember.first, Member = BaseMember.second;
if (getParser().lookUpField(SM.getType(), DotDispStr, Info) &&
@@ -2031,6 +2091,8 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
while (Tok.getLoc().getPointer() < DotExprEndLoc)
Lex();
+ if (!TrailingDot.empty())
+ getLexer().UnLex(AsmToken(AsmToken::Dot, TrailingDot));
SM.addImm(Info.Offset);
SM.setTypeInfo(Info.Type);
return false;
diff --git a/llvm/test/tools/llvm-ml/dot_operator.test b/llvm/test/tools/llvm-ml/dot_operator.test
new file mode 100644
index 000000000000..bbea6152e8db
--- /dev/null
+++ b/llvm/test/tools/llvm-ml/dot_operator.test
@@ -0,0 +1,67 @@
+# RUN: llvm-ml -filetype=asm %s | FileCheck %s
+
+.data
+
+FOO STRUCT
+ a BYTE ?
+ b BYTE ?
+ c BYTE ?
+ d BYTE ?
+FOO ENDS
+
+BAR STRUCT
+ e WORD ?
+ f WORD ?
+BAR ENDS
+
+var FOO <>
+
+.code
+
+t1:
+mov al, var.a
+mov al, var. b
+mov al, var .c
+mov al, var . d
+
+; CHECK-LABEL: t1:
+; CHECK: mov al, byte ptr [rip + var]
+; CHECK: mov al, byte ptr [rip + var+1]
+; CHECK: mov al, byte ptr [rip + var+2]
+; CHECK: mov al, byte ptr [rip + var+3]
+
+t2:
+mov eax, FOO.a
+mov ax, FOO. b
+mov al, FOO .c
+mov eax, FOO . d
+
+; CHECK-LABEL: t2:
+; CHECK: mov eax, 0
+; CHECK: mov ax, 1
+; CHECK: mov al, 2
+; CHECK: mov eax, 3
+
+t3:
+mov al, BYTE PTR var[FOO.c]
+
+; CHECK-LABEL: t3:
+; CHECK: mov al, byte ptr [rip + var+2]
+
+t4:
+mov ax, var.BAR.f
+mov ax, var .BAR.f
+mov ax, var. BAR.f
+mov ax, var.BAR .f
+mov ax, var.BAR. f
+mov ax, var . BAR . f
+
+; CHECK-LABEL: t4:
+; CHECK: mov ax, word ptr [rip + var+2]
+; CHECK: mov ax, word ptr [rip + var+2]
+; CHECK: mov ax, word ptr [rip + var+2]
+; CHECK: mov ax, word ptr [rip + var+2]
+; CHECK: mov ax, word ptr [rip + var+2]
+; CHECK: mov ax, word ptr [rip + var+2]
+
+END
diff --git a/llvm/test/tools/llvm-ml/struct.test b/llvm/test/tools/llvm-ml/struct.test
index facd7c14e4f4..479d31c8121f 100644
--- a/llvm/test/tools/llvm-ml/struct.test
+++ b/llvm/test/tools/llvm-ml/struct.test
@@ -140,7 +140,7 @@ mov al, [t2.FOOBAR.e.b]
; CHECK-NEXT: mov al, byte ptr [rip + t2+9]
; CHECK-NEXT: mov al, byte ptr [rip + t2+9]
; CHECK-NEXT: mov al, byte ptr [rip + t2+9]
-; CHECK-NEXT: mov al, byte ptr [rip + (t2+8)+1]
+; CHECK-NEXT: mov al, byte ptr [rip + t2+9]
; CHECK-NEXT: mov al, byte ptr [rip + t2+9]
QUUX STRUCT
More information about the llvm-commits
mailing list