[llvm] 5dd1b6d - [ms] [llvm-ml] Add support for .radix directive, and accept all radix specifiers
Eric Astor via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 23 10:46:39 PDT 2020
Author: Eric Astor
Date: 2020-09-23T13:45:58-04:00
New Revision: 5dd1b6d612655c9006ba97a8b6487ded80719b48
URL: https://github.com/llvm/llvm-project/commit/5dd1b6d612655c9006ba97a8b6487ded80719b48
DIFF: https://github.com/llvm/llvm-project/commit/5dd1b6d612655c9006ba97a8b6487ded80719b48.diff
LOG: [ms] [llvm-ml] Add support for .radix directive, and accept all radix specifiers
Add support for .radix directive, and radix specifiers [yY] (binary), [oOqQ] (octal), and [tT] (decimal).
Also, when lexing MASM integers, require radix specifier; MASM requires that all literals without a radix specifier be treated as in the default radix. (e.g., 0100 = 100)
Reviewed By: thakis
Differential Revision: https://reviews.llvm.org/D87400
Added:
llvm/test/tools/llvm-ml/radix.test
llvm/test/tools/llvm-ml/radix_errors.test
Modified:
llvm/include/llvm/MC/MCParser/MCAsmLexer.h
llvm/lib/MC/MCParser/AsmLexer.cpp
llvm/lib/MC/MCParser/COFFMasmParser.cpp
llvm/lib/MC/MCParser/MasmParser.cpp
llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
index e89abeaac94c..a9481a02f098 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -50,6 +50,7 @@ class MCAsmLexer {
bool AllowAtInIdentifier;
bool IsAtStartOfStatement = true;
bool LexMasmIntegers = false;
+ unsigned DefaultRadix = 10;
AsmCommentConsumer *CommentConsumer = nullptr;
MCAsmLexer();
@@ -143,6 +144,9 @@ class MCAsmLexer {
bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
+ unsigned getDefaultRadix() const { return DefaultRadix; }
+ void setDefaultRadix(unsigned Radix) { DefaultRadix = Radix; }
+
void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
this->CommentConsumer = CommentConsumer;
}
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index 5a571c7c0c0e..b1383fafb0dc 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCParser/AsmLexer.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/Support/Compiler.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringExtras.h"
@@ -271,6 +272,13 @@ static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
return DefaultRadix;
}
+static const char *findLastDigit(const char *CurPtr, unsigned DefaultRadix) {
+ while (hexDigitValue(*CurPtr) < DefaultRadix) {
+ ++CurPtr;
+ }
+ return CurPtr;
+}
+
static AsmToken intToken(StringRef Ref, APInt &Value)
{
if (Value.isIntN(64))
@@ -278,6 +286,21 @@ static AsmToken intToken(StringRef Ref, APInt &Value)
return AsmToken(AsmToken::BigNum, Ref, Value);
}
+static std::string radixName(unsigned Radix) {
+ switch (Radix) {
+ case 2:
+ return "binary";
+ case 8:
+ return "octal";
+ case 10:
+ return "decimal";
+ case 16:
+ return "hexadecimal";
+ default:
+ return "base-" + std::to_string(Radix);
+ }
+}
+
/// LexDigit: First character is [0-9].
/// Local Label: [0-9][:]
/// Forward/Backward Label: [0-9][fb]
@@ -286,16 +309,46 @@ static AsmToken intToken(StringRef Ref, APInt &Value)
/// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
/// Decimal integer: [1-9][0-9]*
AsmToken AsmLexer::LexDigit() {
- // MASM-flavor binary integer: [01]+[bB]
+ // MASM-flavor binary integer: [01]+[yY] (if DefaultRadix < 16, [bByY])
+ // MASM-flavor octal integer: [0-7]+[oOqQ]
+ // MASM-flavor decimal integer: [0-9]+[tT] (if DefaultRadix < 16, [dDtT])
// MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
if (LexMasmIntegers && isdigit(CurPtr[-1])) {
- const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
- CurPtr - 1 : nullptr;
+ const char *FirstNonBinary =
+ (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? CurPtr - 1 : nullptr;
+ const char *FirstNonDecimal =
+ (CurPtr[-1] < '0' || CurPtr[-1] > '9') ? CurPtr - 1 : nullptr;
const char *OldCurPtr = CurPtr;
while (isHexDigit(*CurPtr)) {
- if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary)
- FirstNonBinary = CurPtr;
+ switch (*CurPtr) {
+ default:
+ if (!FirstNonDecimal) {
+ FirstNonDecimal = CurPtr;
+ }
+ LLVM_FALLTHROUGH;
+ case '9':
+ case '8':
+ case '7':
+ case '6':
+ case '5':
+ case '4':
+ case '3':
+ case '2':
+ if (!FirstNonBinary) {
+ FirstNonBinary = CurPtr;
+ }
+ break;
+ case '1':
+ case '0':
+ break;
+ }
+ ++CurPtr;
+ }
+ if (*CurPtr == '.') {
+ // MASM float literals (other than hex floats) always contain a ".", and
+ // are always written in decimal.
++CurPtr;
+ return LexFloatLiteral();
}
unsigned Radix = 0;
@@ -303,28 +356,61 @@ AsmToken AsmLexer::LexDigit() {
// hexadecimal number
++CurPtr;
Radix = 16;
+ } else if (*CurPtr == 't' || *CurPtr == 'T') {
+ // decimal number
+ ++CurPtr;
+ Radix = 10;
+ } else if (*CurPtr == 'o' || *CurPtr == 'O' || *CurPtr == 'q' ||
+ *CurPtr == 'Q') {
+ // octal number
+ ++CurPtr;
+ Radix = 8;
+ } else if (*CurPtr == 'y' || *CurPtr == 'Y') {
+ // binary number
+ ++CurPtr;
+ Radix = 2;
+ } else if (FirstNonDecimal && FirstNonDecimal + 1 == CurPtr &&
+ DefaultRadix < 14 &&
+ (*FirstNonDecimal == 'd' || *FirstNonDecimal == 'D')) {
+ Radix = 10;
} else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
- (*FirstNonBinary == 'b' || *FirstNonBinary == 'B'))
+ DefaultRadix < 12 &&
+ (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) {
Radix = 2;
+ }
- if (Radix == 2 || Radix == 16) {
+ if (Radix) {
StringRef Result(TokStart, CurPtr - TokStart);
APInt Value(128, 0, true);
if (Result.drop_back().getAsInteger(Radix, Value))
- return ReturnError(TokStart, Radix == 2 ? "invalid binary number" :
- "invalid hexdecimal number");
+ return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
// MSVC accepts and ignores type suffices on integer literals.
SkipIgnoredIntegerSuffix(CurPtr);
return intToken(Result, Value);
- }
+ }
- // octal/decimal integers, or floating point numbers, fall through
+ // default-radix integers, or floating point numbers, fall through
CurPtr = OldCurPtr;
}
+ // MASM default-radix integers: [0-9a-fA-F]+
+ // (All other integer literals have a radix specifier.)
+ if (LexMasmIntegers) {
+ CurPtr = findLastDigit(CurPtr, 16);
+ StringRef Result(TokStart, CurPtr - TokStart);
+
+ APInt Value(128, 0, true);
+ if (Result.getAsInteger(DefaultRadix, Value)) {
+ return ReturnError(TokStart,
+ "invalid " + radixName(DefaultRadix) + " number");
+ }
+
+ return intToken(Result, Value);
+ }
+
// Decimal integer: [1-9][0-9]*
if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
@@ -339,13 +425,9 @@ AsmToken AsmLexer::LexDigit() {
StringRef Result(TokStart, CurPtr - TokStart);
APInt Value(128, 0, true);
- if (Result.getAsInteger(Radix, Value))
- return ReturnError(TokStart, !isHex ? "invalid decimal number" :
- "invalid hexdecimal number");
-
- // Consume the [hH].
- if (LexMasmIntegers && Radix == 16)
- ++CurPtr;
+ if (Result.getAsInteger(Radix, Value)) {
+ return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
+ }
// The darwin/x86 (and x86-64) assembler accepts and ignores type
// suffices on integer literals.
@@ -416,11 +498,9 @@ AsmToken AsmLexer::LexDigit() {
// Either octal or hexadecimal.
APInt Value(128, 0, true);
unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
- bool isHex = Radix == 16;
StringRef Result(TokStart, CurPtr - TokStart);
if (Result.getAsInteger(Radix, Value))
- return ReturnError(TokStart, !isHex ? "invalid octal number" :
- "invalid hexdecimal number");
+ return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
// Consume the [hH].
if (Radix == 16)
diff --git a/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/llvm/lib/MC/MCParser/COFFMasmParser.cpp
index 532ded038043..575e6ee265c8 100644
--- a/llvm/lib/MC/MCParser/COFFMasmParser.cpp
+++ b/llvm/lib/MC/MCParser/COFFMasmParser.cpp
@@ -132,7 +132,6 @@ class COFFMasmParser : public MCAsmParserExtension {
// option
// popcontext
// pushcontext
- // .radix
// .safeseh
// Procedure directives
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index ca9b2df7cf23..cdefc062c7dc 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -732,6 +732,7 @@ class MasmParser : public MCAsmParser {
DK_SAVEREG,
DK_SAVEXMM128,
DK_SETFRAME,
+ DK_RADIX,
};
/// Maps directive name --> DirectiveKind enum, for directives parsed by this
@@ -964,6 +965,9 @@ class MasmParser : public MCAsmParser {
// ".erre" or ".errnz", depending on ExpectZero.
bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero);
+ // ".radix"
+ bool parseDirectiveRadix(SMLoc DirectiveLoc);
+
// "echo"
bool parseDirectiveEcho();
@@ -2284,6 +2288,8 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
return parseDirectiveErrorIfe(IDLoc, true);
case DK_ERRNZ:
return parseDirectiveErrorIfe(IDLoc, false);
+ case DK_RADIX:
+ return parseDirectiveRadix(IDLoc);
case DK_ECHO:
return parseDirectiveEcho();
}
@@ -6343,6 +6349,7 @@ void MasmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".savereg"] = DK_SAVEREG;
DirectiveKindMap[".savexmm128"] = DK_SAVEXMM128;
DirectiveKindMap[".setframe"] = DK_SETFRAME;
+ DirectiveKindMap[".radix"] = DK_RADIX;
// DirectiveKindMap[".altmacro"] = DK_ALTMACRO;
// DirectiveKindMap[".noaltmacro"] = DK_NOALTMACRO;
DirectiveKindMap["db"] = DK_DB;
@@ -6584,6 +6591,22 @@ bool MasmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
return false;
}
+bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) {
+ const SMLoc Loc = getLexer().getLoc();
+ StringRef RadixString = parseStringToEndOfStatement().trim();
+ unsigned Radix;
+ if (RadixString.getAsInteger(10, Radix)) {
+ return Error(Loc,
+ "radix must be a decimal number in the range 2 to 16; was " +
+ RadixString);
+ }
+ if (Radix < 2 || Radix > 16)
+ return Error(Loc, "radix must be in the range 2 to 16; was " +
+ std::to_string(Radix));
+ getLexer().setDefaultRadix(Radix);
+ return false;
+}
+
bool MasmParser::parseDirectiveEcho() {
StringRef Message = parseStringToEndOfStatement();
Lex(); // eat end of statement
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 3270932a76d0..c07c291c74ee 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1662,6 +1662,9 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
if ((Done = SM.isValidEndState()))
break;
return Error(Tok.getLoc(), "unknown token in expression");
+ case AsmToken::Error:
+ return Error(getLexer().getErrLoc(), getLexer().getErr());
+ break;
case AsmToken::EndOfStatement:
Done = true;
break;
diff --git a/llvm/test/tools/llvm-ml/radix.test b/llvm/test/tools/llvm-ml/radix.test
new file mode 100644
index 000000000000..64333706b07d
--- /dev/null
+++ b/llvm/test/tools/llvm-ml/radix.test
@@ -0,0 +1,97 @@
+# RUN: llvm-ml -filetype=asm %s | FileCheck %s
+
+.code
+
+t1:
+mov eax, 100b
+mov eax, 100y
+
+; CHECK-LABEL: t1:
+; CHECK-NEXT: mov eax, 4
+; CHECK-NEXT: mov eax, 4
+
+t2:
+mov eax, 100o
+mov eax, 100q
+
+; CHECK-LABEL: t2:
+; CHECK-NEXT: mov eax, 64
+; CHECK-NEXT: mov eax, 64
+
+t3:
+mov eax, 100d
+mov eax, 100t
+
+; CHECK-LABEL: t3:
+; CHECK-NEXT: mov eax, 100
+; CHECK-NEXT: mov eax, 100
+
+t4:
+mov eax, 100h
+
+; CHECK-LABEL: t4:
+; CHECK-NEXT: mov eax, 256
+
+t5:
+mov eax, 100
+.radix 2
+mov eax, 100
+.radix 16
+mov eax, 100
+.radix 10
+mov eax, 100
+
+; CHECK-LABEL: t5:
+; CHECK: mov eax, 100
+; CHECK: mov eax, 4
+; CHECK: mov eax, 256
+; CHECK: mov eax, 100
+
+t6:
+.radix 9
+mov eax, 100
+.radix 10
+
+; CHECK-LABEL: t6:
+; CHECK: mov eax, 81
+
+t7:
+.radix 12
+mov eax, 100b
+mov eax, 100y
+.radix 10
+
+; CHECK-LABEL: t7:
+; CHECK: mov eax, 1739
+; CHECK: mov eax, 4
+
+t8:
+.radix 16
+mov eax, 100d
+mov eax, 100t
+.radix 10
+
+; CHECK-LABEL: t8:
+; CHECK: mov eax, 4109
+; CHECK: mov eax, 100
+
+t9:
+.radix 12
+mov eax, 102b
+.radix 16
+mov eax, 10fd
+.radix 10
+
+; CHECK-LABEL: t9:
+; CHECK: mov eax, 1763
+; CHECK: mov eax, 4349
+
+t10:
+.radix 16
+mov eax, 1e1
+.radix 10
+
+; CHECK-LABEL: t10:
+; CHECK: mov eax, 481
+
+END
diff --git a/llvm/test/tools/llvm-ml/radix_errors.test b/llvm/test/tools/llvm-ml/radix_errors.test
new file mode 100644
index 000000000000..c3a327c4c60d
--- /dev/null
+++ b/llvm/test/tools/llvm-ml/radix_errors.test
@@ -0,0 +1,60 @@
+; RUN: not llvm-ml -filetype=asm %s 2>&1 | FileCheck %s
+
+.code
+
+t1:
+mov eax, 120b
+mov eax, 120y
+.radix 11
+mov eax, 120b
+mov eax, 120y
+.radix 10
+
+; CHECK: error: invalid decimal number
+; CHECK: error: invalid binary number
+; CHECK: error: invalid base-11 number
+; CHECK: error: invalid binary number
+
+t2:
+mov eax, 190o
+mov eax, 190q
+.radix 13
+mov eax, 190o
+mov eax, 190q
+.radix 10
+
+; CHECK: error: invalid octal number
+; CHECK: error: invalid octal number
+; CHECK: error: invalid octal number
+; CHECK: error: invalid octal number
+
+t3:
+mov eax, 1f0d
+mov eax, 1f0t
+.radix 13
+mov eax, 1f0d
+mov eax, 1f0t
+.radix 10
+
+; CHECK: error: invalid decimal number
+; CHECK: error: invalid decimal number
+; CHECK: error: invalid base-13 number
+; CHECK: error: invalid decimal number
+
+t4:
+mov eax, 10e
+.radix 16
+.radix 10
+mov eax, 10e
+
+; CHECK: error: invalid decimal number
+; CHECK: error: invalid decimal number
+
+t5:
+.radix 9
+mov eax, 9
+.radix 10
+
+; CHECK: error: invalid base-9 number
+
+END
More information about the llvm-commits
mailing list