[llvm] 5afb360 - [ms] [llvm-ml] Allow arbitrary strings as integer constants
Eric Astor via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 6 14:15:57 PST 2020
Author: Eric Astor
Date: 2020-11-06T17:15:49-05:00
New Revision: 5afb36080809567edf5a7936821dcac5a064527b
URL: https://github.com/llvm/llvm-project/commit/5afb36080809567edf5a7936821dcac5a064527b
DIFF: https://github.com/llvm/llvm-project/commit/5afb36080809567edf5a7936821dcac5a064527b.diff
LOG: [ms] [llvm-ml] Allow arbitrary strings as integer constants
MASM interprets strings in expression contexts as integers expressed in big-endian base-256, treating each character as its ASCII representation.
This completely eliminates the need to special-case single-character strings.
Reviewed By: thakis
Differential Revision: https://reviews.llvm.org/D90788
Added:
llvm/test/tools/llvm-ml/strings_errors.test
Modified:
llvm/lib/MC/MCParser/MasmParser.cpp
llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
llvm/test/tools/llvm-ml/strings.test
Removed:
################################################################################
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 845a3cf13d2b..6df53fc511f4 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -1332,6 +1332,8 @@ bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
/// primaryexpr ::= number
/// primaryexpr ::= '.'
/// primaryexpr ::= ~,+,-,'not' primaryexpr
+/// primaryexpr ::= string
+/// (a string is interpreted as a 64-bit number in big-endian base-256)
bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
AsmTypeInfo *TypeInfo) {
SMLoc FirstTokenLoc = getLexer().getLoc();
@@ -1350,7 +1352,6 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
return false;
case AsmToken::Dollar:
case AsmToken::At:
- case AsmToken::String:
case AsmToken::Identifier: {
StringRef Identifier;
if (parseIdentifier(Identifier)) {
@@ -1517,6 +1518,20 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
}
return false;
}
+ case AsmToken::String: {
+ // MASM strings (used as constants) are interpreted as big-endian base-256.
+ SMLoc ValueLoc = getTok().getLoc();
+ std::string Value;
+ if (parseEscapedString(Value))
+ return true;
+ if (Value.size() > 8)
+ return Error(ValueLoc, "literal value out of range");
+ uint64_t IntValue = 0;
+ for (const unsigned char CharVal : Value)
+ IntValue = (IntValue << 8) | CharVal;
+ Res = MCConstantExpr::create(IntValue, getContext());
+ return false;
+ }
case AsmToken::Real: {
APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
@@ -3168,28 +3183,17 @@ bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
bool MasmParser::parseScalarInitializer(unsigned Size,
SmallVectorImpl<const MCExpr *> &Values,
unsigned StringPadLength) {
- if (getTok().is(AsmToken::String)) {
+ if (Size == 1 && getTok().is(AsmToken::String)) {
std::string Value;
if (parseEscapedString(Value))
return true;
- if (Size == 1) {
- // Treat each character as an initializer.
- for (const char CharVal : Value)
- Values.push_back(MCConstantExpr::create(CharVal, getContext()));
-
- // Pad the string with spaces to the specified length.
- for (size_t i = Value.size(); i < StringPadLength; ++i)
- Values.push_back(MCConstantExpr::create(' ', getContext()));
- } else {
- // Treat the string as an initial value in big-endian representation.
- if (Value.size() > Size)
- return Error(getTok().getLoc(), "out of range literal value");
-
- uint64_t IntValue = 0;
- for (const unsigned char CharVal : Value)
- IntValue = (IntValue << 8) | CharVal;
- Values.push_back(MCConstantExpr::create(IntValue, getContext()));
- }
+ // Treat each character as an initializer.
+ for (const unsigned char CharVal : Value)
+ Values.push_back(MCConstantExpr::create(CharVal, getContext()));
+
+ // Pad the string with spaces to the specified length.
+ for (size_t i = Value.size(); i < StringPadLength; ++i)
+ Values.push_back(MCConstantExpr::create(' ', getContext()));
} else {
const MCExpr *Value;
if (parseExpression(Value))
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 6d037ca14523..fd7beea803fd 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1693,20 +1693,25 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
return Error(Tok.getLoc(), "unknown token in expression");
}
LLVM_FALLTHROUGH;
+ case AsmToken::String: {
+ if (Parser.isParsingMasm()) {
+ // MASM parsers handle strings in expressions as constants.
+ SMLoc ValueLoc = Tok.getLoc();
+ int64_t Res;
+ const MCExpr *Val;
+ if (Parser.parsePrimaryExpr(Val, End, nullptr))
+ return true;
+ UpdateLocLex = false;
+ if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
+ return Error(ValueLoc, "expected absolute value");
+ if (SM.onInteger(Res, ErrMsg))
+ return Error(ValueLoc, ErrMsg);
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ }
case AsmToken::At:
- case AsmToken::String:
case AsmToken::Identifier: {
- if (Parser.isParsingMasm() && Tok.is(AsmToken::String)) {
- // Single-character strings should be treated as integer constants. This
- // includes MASM escapes for quotes.
- char Quote = Tok.getString().front();
- StringRef Contents = Tok.getStringContents();
- if (Contents.size() == 1 || Contents == std::string(2, Quote)) {
- if (SM.onInteger(Contents.front(), ErrMsg))
- return Error(Tok.getLoc(), ErrMsg);
- break;
- }
- }
SMLoc IdentLoc = Tok.getLoc();
StringRef Identifier = Tok.getString();
UpdateLocLex = false;
diff --git a/llvm/test/tools/llvm-ml/strings.test b/llvm/test/tools/llvm-ml/strings.test
index 5064a458b7ec..c43f39d42c3f 100644
--- a/llvm/test/tools/llvm-ml/strings.test
+++ b/llvm/test/tools/llvm-ml/strings.test
@@ -119,4 +119,25 @@ dq_char_test PROC
ret
dq_char_test ENDP
+string_constant_test PROC
+; CHECK-LABEL: string_constant_test:
+
+ mov eax, 'ab'
+ mov eax, "ab"
+; CHECK: mov eax, 24930
+; CHECK: mov eax, 24930
+
+ mov eax, "abc"
+ mov eax, 'abc'
+; CHECK: mov eax, 6382179
+; CHECK: mov eax, 6382179
+
+ mov eax, "abc"""
+ mov eax, 'abc'''
+; CHECK: mov eax, 1633837858
+; CHECK: mov eax, 1633837863
+
+ ret
+string_constant_test ENDP
+
end
diff --git a/llvm/test/tools/llvm-ml/strings_errors.test b/llvm/test/tools/llvm-ml/strings_errors.test
new file mode 100644
index 000000000000..377ad29c06ff
--- /dev/null
+++ b/llvm/test/tools/llvm-ml/strings_errors.test
@@ -0,0 +1,15 @@
+; RUN: not llvm-ml -filetype=asm %s 2>&1 | FileCheck %s --implicit-check-not=error:
+
+.code
+
+oversize_string_test PROC
+
+ mov rax, "abcdefghi"
+ mov rax, 'abcdefghi'
+; CHECK: error: literal value out of range
+; CHECK: error: literal value out of range
+
+ ret
+oversize_string_test ENDP
+
+end
More information about the llvm-commits
mailing list