[llvm] c2e272f - [ms] [llvm-ml] Improve data support, adding names and complex initializers.
Eric Astor via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 24 12:40:27 PST 2020
Author: Eric Astor
Date: 2020-02-24T15:40:04-05:00
New Revision: c2e272f8cf76ec97f675e0dfdada75445bbee5c5
URL: https://github.com/llvm/llvm-project/commit/c2e272f8cf76ec97f675e0dfdada75445bbee5c5
DIFF: https://github.com/llvm/llvm-project/commit/c2e272f8cf76ec97f675e0dfdada75445bbee5c5.diff
LOG: [ms] [llvm-ml] Improve data support, adding names and complex initializers.
Summary: Add support for ?, DUP, and string initializers, as well as MASM syntax for named data locations.
Reviewers: rnk, thakis
Reviewed By: thakis
Subscribers: merge_guards_bot, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D73226
Added:
llvm/test/tools/llvm-ml/basic_data.test
Modified:
llvm/lib/MC/MCParser/MasmParser.cpp
Removed:
################################################################################
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 99ede023f0bd..55a2d43dfe5a 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -482,12 +482,25 @@ class MasmParser : public MCAsmParser {
/// Codeview def_range types parsed by this class.
StringMap<CVDefRangeType> CVDefRangeTypeMap;
+ bool parseInitValue(unsigned Size);
+
// ".ascii", ".asciz", ".string"
bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
- bool parseDirectiveValue(StringRef IDVal,
- unsigned Size); // "byte", "word", ...
- bool parseDirectiveRealValue(StringRef IDVal,
- const fltSemantics &); // "real4", ...
+
+ // "byte", "word", ...
+ bool parseScalarInstList(unsigned Size,
+ SmallVectorImpl<const MCExpr *> &Values);
+ bool parseDirectiveValue(StringRef IDVal, unsigned Size);
+ bool parseDirectiveNamedValue(StringRef IDVal, unsigned Size, StringRef Name,
+ SMLoc NameLoc);
+
+ // "real4", "real8"
+ bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics);
+ bool parseRealInstList(const fltSemantics &Semantics,
+ SmallVectorImpl<APInt> &Values);
+ bool parseDirectiveNamedRealValue(StringRef IDVal,
+ const fltSemantics &Semantics,
+ StringRef Name, SMLoc NameLoc);
// "=", "equ", "textequ"
bool parseDirectiveEquate(StringRef IDVal, StringRef Name,
@@ -1903,6 +1916,33 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
case DK_TEXTEQU:
Lex();
return parseDirectiveEquate(nextVal, IDVal, DirKind);
+ case DK_BYTE:
+ case DK_DB:
+ Lex();
+ return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
+ case DK_WORD:
+ case DK_DW:
+ Lex();
+ return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
+ case DK_DWORD:
+ case DK_DD:
+ Lex();
+ return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
+ case DK_FWORD:
+ Lex();
+ return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
+ case DK_QWORD:
+ case DK_DQ:
+ Lex();
+ return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
+ case DK_REAL4:
+ Lex();
+ return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), IDVal,
+ IDLoc);
+ case DK_REAL8:
+ Lex();
+ return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), IDVal,
+ IDLoc);
}
// __asm _emit or __asm __emit
@@ -2739,31 +2779,99 @@ bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
return false;
}
+bool MasmParser::parseScalarInstList(unsigned Size,
+ SmallVectorImpl<const MCExpr *> &Values) {
+ do {
+ if (getTok().is(AsmToken::String)) {
+ StringRef Value = getTok().getStringContents();
+ if (Size == 1) {
+ // Treat each character as an initializer.
+ for (const char CharVal : Value)
+ Values.push_back(MCConstantExpr::create(CharVal, getContext()));
+ } else {
+ // Treat the string as an initial value in big-endian representation.
+ if (Value.size() > Size)
+ return Error(getTok().getLoc(), "out of range literal value");
+
+ uint64_t IntValue = 0;
+ for (const unsigned char CharVal : Value.bytes())
+ IntValue = (IntValue << 8) | CharVal;
+ Values.push_back(MCConstantExpr::create(IntValue, getContext()));
+ }
+ Lex();
+ } else {
+ const MCExpr *Value;
+ if (checkForValidSection() || parseExpression(Value))
+ return true;
+ if (getTok().is(AsmToken::Identifier) &&
+ getTok().getString().equals_lower("dup")) {
+ Lex(); // eat 'dup'
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
+ if (!MCE)
+ return Error(Value->getLoc(),
+ "cannot repeat value a non-constant number of times");
+ const int64_t Repetitions = MCE->getValue();
+ if (Repetitions < 0)
+ return Error(Value->getLoc(),
+ "cannot repeat value a negative number of times");
+
+ SmallVector<const MCExpr *, 1> DuplicatedValues;
+ if (parseToken(AsmToken::LParen,
+ "parentheses required for 'dup' contents") ||
+ parseScalarInstList(Size, DuplicatedValues) ||
+ parseToken(AsmToken::RParen, "unmatched parentheses"))
+ return true;
+
+ for (int i = 0; i < Repetitions; ++i)
+ Values.append(DuplicatedValues.begin(), DuplicatedValues.end());
+ } else {
+ Values.push_back(Value);
+ }
+ }
+
+ // Continue if we see a comma. (Also, allow line continuation.)
+ } while (parseOptionalToken(AsmToken::Comma) &&
+ (getTok().isNot(AsmToken::EndOfStatement) ||
+ !parseToken(AsmToken::EndOfStatement)));
+
+ return false;
+}
+
/// parseDirectiveValue
/// ::= (byte | word | ... ) [ expression (, expression)* ]
bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
- auto parseOp = [&]() -> bool {
- const MCExpr *Value;
- SMLoc ExprLoc = getLexer().getLoc();
- if (checkForValidSection() || parseExpression(Value))
- return true;
+ SmallVector<const MCExpr *, 1> Values;
+ if (parseScalarInstList(Size, Values))
+ return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
+
+ for (const MCExpr *Value : Values) {
// Special case constant expressions to match code generator.
if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
assert(Size <= 8 && "Invalid size");
int64_t IntValue = MCE->getValue();
if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
- return Error(ExprLoc, "out of range literal value");
+ return Error(MCE->getLoc(), "out of range literal value");
getStreamer().emitIntValue(IntValue, Size);
- } else
- getStreamer().emitValue(Value, Size, ExprLoc);
- return false;
- };
-
- if (parseMany(parseOp))
- return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
+ } else if (const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value);
+ MSE && MSE->getSymbol().getName() == "?") {
+ // ? initializer; treat as 0.
+ getStreamer().emitIntValue(0, Size);
+ } else {
+ getStreamer().emitValue(Value, Size, Value->getLoc());
+ }
+ }
return false;
}
+/// parseDirectiveNamedValue
+/// ::= name (byte | word | ... ) [ expression (, expression)* ]
+bool MasmParser::parseDirectiveNamedValue(StringRef IDVal, unsigned Size,
+ StringRef Name, SMLoc NameLoc) {
+ MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ getStreamer().emitLabel(Sym);
+ return parseDirectiveValue(IDVal, Size);
+}
+
static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) {
if (Asm.getTok().isNot(AsmToken::Integer) &&
Asm.getTok().isNot(AsmToken::BigNum))
@@ -2824,24 +2932,75 @@ bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
return false;
}
+bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
+ SmallVectorImpl<APInt> &ValuesAsInt) {
+ do {
+ const AsmToken NextTok = Lexer.peekTok();
+ if (NextTok.is(AsmToken::Identifier) &&
+ NextTok.getString().equals_lower("dup")) {
+ const MCExpr *Value;
+ if (parseExpression(Value) || parseToken(AsmToken::Identifier))
+ return true;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
+ if (!MCE)
+ return Error(Value->getLoc(),
+ "cannot repeat value a non-constant number of times");
+ const int64_t Repetitions = MCE->getValue();
+ if (Repetitions < 0)
+ return Error(Value->getLoc(),
+ "cannot repeat value a negative number of times");
+
+ SmallVector<APInt, 1> DuplicatedValues;
+ if (parseToken(AsmToken::LParen,
+ "parentheses required for 'dup' contents") ||
+ parseRealInstList(Semantics, DuplicatedValues) ||
+ parseToken(AsmToken::RParen, "unmatched parentheses"))
+ return true;
+
+ for (int i = 0; i < Repetitions; ++i)
+ ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end());
+ } else {
+ APInt AsInt;
+ if (parseRealValue(Semantics, AsInt))
+ return true;
+ ValuesAsInt.push_back(AsInt);
+ }
+ // Continue if we see a comma. (Also, allow line continuation.)
+ } while (parseOptionalToken(AsmToken::Comma) &&
+ (getTok().isNot(AsmToken::EndOfStatement) ||
+ !parseToken(AsmToken::EndOfStatement)));
+
+ return false;
+}
+
/// parseDirectiveRealValue
/// ::= (real4 | real8) [ expression (, expression)* ]
bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
const fltSemantics &Semantics) {
- auto parseOp = [&]() -> bool {
- APInt AsInt;
- if (checkForValidSection() || parseRealValue(Semantics, AsInt))
- return true;
- getStreamer().emitIntValue(AsInt.getLimitedValue(),
- AsInt.getBitWidth() / 8);
- return false;
- };
+ if (checkForValidSection())
+ return true;
- if (parseMany(parseOp))
+ SmallVector<APInt, 1> ValuesAsInt;
+ if (parseRealInstList(Semantics, ValuesAsInt))
return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
+
+ for (const APInt &AsInt : ValuesAsInt) {
+ getStreamer().emitIntValue(AsInt.getLimitedValue(),
+ AsInt.getBitWidth() / 8);
+ }
return false;
}
+/// parseDirectiveNamedRealValue
+/// ::= name (real4 | real8) [ expression (, expression)* ]
+bool MasmParser::parseDirectiveNamedRealValue(StringRef IDVal,
+ const fltSemantics &Semantics,
+ StringRef Name, SMLoc NameLoc) {
+ MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ getStreamer().emitLabel(Sym);
+ return parseDirectiveRealValue(IDVal, Semantics);
+}
+
/// parseDirectiveOrg
/// ::= .org expression [ , expression ]
bool MasmParser::parseDirectiveOrg() {
diff --git a/llvm/test/tools/llvm-ml/basic_data.test b/llvm/test/tools/llvm-ml/basic_data.test
new file mode 100644
index 000000000000..d39775fb6646
--- /dev/null
+++ b/llvm/test/tools/llvm-ml/basic_data.test
@@ -0,0 +1,38 @@
+# RUN: llvm-ml -filetype=asm %s | FileCheck %s
+
+.data
+BYTE 2, 4, 6, 8
+; CHECK: .data
+; CHECK-NEXT: .byte 2
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .byte 8
+
+BYTE 2 dup (1, 2 dup (2)),
+ 3
+; CHECK: .byte 1
+; CHECK-NEXT: .byte 2
+; CHECK-NEXT: .byte 2
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 2
+; CHECK-NEXT: .byte 2
+; CHECK-NEXT: .byte 3
+
+REAL4 1, 0
+; CHECK: .long 1065353216
+; CHECK-NEXT: .long 0
+
+REAL4 2 DUP (2.5, 2 dup (0)),
+ 4
+; CHECK: .long 1075838976
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 1075838976
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 1082130432
+
+.code
+BYTE 5
+; CHECK: .text
+; CHECK-NEXT: .byte 5
More information about the llvm-commits
mailing list