[llvm] r345189 - [MC] Separate masm integer literal lexer support from inline asm
Reid Kleckner via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 24 13:23:57 PDT 2018
Author: rnk
Date: Wed Oct 24 13:23:57 2018
New Revision: 345189
URL: http://llvm.org/viewvc/llvm-project?rev=345189&view=rev
Log:
[MC] Separate masm integer literal lexer support from inline asm
Summary:
This renames the IsParsingMSInlineAsm member variable of AsmLexer to
LexMasmIntegers and moves it up to MCAsmLexer. This is the only behavior
controlled by that variable. I added a public setter, so that it can be
set from outside or from the llvm-mc command line. We may need to
arrange things so that users can get this behavior from clang, but
that's future work.
I also put additional hex literal lexing functionality under this flag
to fix PR32973. It appears that this hex literal parsing wasn't intended
to be enabled in non-masm-style blocks.
Now, masm integers (0b1101 and 0ABCh) work in __asm blocks from clang,
but 0b label references work when using .intel_syntax in standalone .s
files.
However, 0b label references will *not* work from __asm blocks in clang.
They will work from GCC inline asm blocks, which it sounds like is
important for Crypto++ as mentioned in PR36144.
Essentially, we only lex masm literals for inline asm blobs that use
intel syntax. If the .intel_syntax directive is used inside a gnu-style
inline asm statement, masm literals will not be lexed, which is
compatible with gas and llvm-mc standalone .s assembly.
This fixes PR36144 and PR32973.
Reviewers: Gerolf, avt77
Subscribers: eraman, hiraditya, llvm-commits
Differential Revision: https://reviews.llvm.org/D53535
Added:
llvm/trunk/test/MC/AArch64/macro-hex-int.s
Modified:
llvm/trunk/include/llvm/MC/MCParser/AsmLexer.h
llvm/trunk/include/llvm/MC/MCParser/MCAsmLexer.h
llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
llvm/trunk/lib/MC/MCParser/AsmLexer.cpp
llvm/trunk/lib/MC/MCParser/AsmParser.cpp
llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp
llvm/trunk/test/MC/X86/intel-syntax-hex.s
llvm/trunk/test/MC/X86/pr27884.s
llvm/trunk/test/tools/llvm-mca/X86/intel-syntax.s
llvm/trunk/tools/llvm-mc/llvm-mc.cpp
Modified: llvm/trunk/include/llvm/MC/MCParser/AsmLexer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCParser/AsmLexer.h?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/include/llvm/MC/MCParser/AsmLexer.h (original)
+++ llvm/trunk/include/llvm/MC/MCParser/AsmLexer.h Wed Oct 24 13:23:57 2018
@@ -30,7 +30,6 @@ class AsmLexer : public MCAsmLexer {
StringRef CurBuf;
bool IsAtStartOfLine = true;
bool IsAtStartOfStatement = true;
- bool IsParsingMSInlineAsm = false;
bool IsPeeking = false;
protected:
@@ -44,7 +43,6 @@ public:
~AsmLexer() override;
void setBuffer(StringRef Buf, const char *ptr = nullptr);
- void setParsingMSInlineAsm(bool V) { IsParsingMSInlineAsm = V; }
StringRef LexUntilEndOfStatement() override;
Modified: llvm/trunk/include/llvm/MC/MCParser/MCAsmLexer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCParser/MCAsmLexer.h?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/include/llvm/MC/MCParser/MCAsmLexer.h (original)
+++ llvm/trunk/include/llvm/MC/MCParser/MCAsmLexer.h Wed Oct 24 13:23:57 2018
@@ -50,6 +50,7 @@ protected: // Can only create subclasses
bool SkipSpace = true;
bool AllowAtInIdentifier;
bool IsAtStartOfStatement = true;
+ bool LexMasmIntegers = false;
AsmCommentConsumer *CommentConsumer = nullptr;
MCAsmLexer();
@@ -146,6 +147,10 @@ public:
void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
this->CommentConsumer = CommentConsumer;
}
+
+ /// Set whether to lex masm-style binary and hex literals. They look like
+ /// 0b1101 and 0ABCh respectively.
+ void setLexMasmIntegers(bool V) { LexMasmIntegers = V; }
};
} // end namespace llvm
Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Wed Oct 24 13:23:57 2018
@@ -156,9 +156,10 @@ void AsmPrinter::EmitInlineAsm(StringRef
Parser->setAssemblerDialect(Dialect);
Parser->setTargetParser(*TAP.get());
Parser->setEnablePrintSchedInfo(EnablePrintSchedInfo);
+ // Enable lexing Masm binary and hex integer literals in intel inline
+ // assembly.
if (Dialect == InlineAsm::AD_Intel)
- // We need this flag to be able to parse numbers like "0bH"
- Parser->setParsingInlineAsm(true);
+ Parser->getLexer().setLexMasmIntegers(true);
if (MF) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
TAP->SetFrameRegister(TRI->getFrameRegister(*MF));
Modified: llvm/trunk/lib/MC/MCParser/AsmLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCParser/AsmLexer.cpp?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/lib/MC/MCParser/AsmLexer.cpp (original)
+++ llvm/trunk/lib/MC/MCParser/AsmLexer.cpp Wed Oct 24 13:23:57 2018
@@ -243,22 +243,26 @@ static void SkipIgnoredIntegerSuffix(con
// Look ahead to search for first non-hex digit, if it's [hH], then we treat the
// integer as a hexadecimal, possibly with leading zeroes.
-static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
- const char *FirstHex = nullptr;
+static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
+ bool LexHex) {
+ const char *FirstNonDec = nullptr;
const char *LookAhead = CurPtr;
while (true) {
if (isDigit(*LookAhead)) {
++LookAhead;
- } else if (isHexDigit(*LookAhead)) {
- if (!FirstHex)
- FirstHex = LookAhead;
- ++LookAhead;
} else {
- break;
+ if (!FirstNonDec)
+ FirstNonDec = LookAhead;
+
+ // Keep going if we are looking for a 'h' suffix.
+ if (LexHex && isHexDigit(*LookAhead))
+ ++LookAhead;
+ else
+ break;
}
}
- bool isHex = *LookAhead == 'h' || *LookAhead == 'H';
- CurPtr = isHex || !FirstHex ? LookAhead : FirstHex;
+ bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H');
+ CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;
if (isHex)
return 16;
return DefaultRadix;
@@ -281,7 +285,7 @@ static AsmToken intToken(StringRef Ref,
AsmToken AsmLexer::LexDigit() {
// MASM-flavor binary integer: [01]+[bB]
// MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
- if (IsParsingMSInlineAsm && isdigit(CurPtr[-1])) {
+ if (LexMasmIntegers && isdigit(CurPtr[-1])) {
const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
CurPtr - 1 : nullptr;
const char *OldCurPtr = CurPtr;
@@ -320,7 +324,7 @@ AsmToken AsmLexer::LexDigit() {
// Decimal integer: [1-9][0-9]*
if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
- unsigned Radix = doLookAhead(CurPtr, 10);
+ unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
bool isHex = Radix == 16;
// Check for floating point literals.
if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
@@ -335,8 +339,8 @@ AsmToken AsmLexer::LexDigit() {
return ReturnError(TokStart, !isHex ? "invalid decimal number" :
"invalid hexdecimal number");
- // Consume the [bB][hH].
- if (Radix == 2 || Radix == 16)
+ // Consume the [hH].
+ if (LexMasmIntegers && Radix == 16)
++CurPtr;
// The darwin/x86 (and x86-64) assembler accepts and ignores type
@@ -346,7 +350,7 @@ AsmToken AsmLexer::LexDigit() {
return intToken(Result, Value);
}
- if (!IsParsingMSInlineAsm && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
+ if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
++CurPtr;
// See if we actually have "0b" as part of something like "jmp 0b\n"
if (!isDigit(CurPtr[0])) {
@@ -395,7 +399,7 @@ AsmToken AsmLexer::LexDigit() {
return ReturnError(TokStart, "invalid hexadecimal number");
// Consume the optional [hH].
- if (!IsParsingMSInlineAsm && (*CurPtr == 'h' || *CurPtr == 'H'))
+ if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H'))
++CurPtr;
// The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
@@ -407,7 +411,7 @@ AsmToken AsmLexer::LexDigit() {
// Either octal or hexadecimal.
APInt Value(128, 0, true);
- unsigned Radix = doLookAhead(CurPtr, 8);
+ unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
bool isHex = Radix == 16;
StringRef Result(TokStart, CurPtr - TokStart);
if (Result.getAsInteger(Radix, Value))
Modified: llvm/trunk/lib/MC/MCParser/AsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCParser/AsmParser.cpp?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/lib/MC/MCParser/AsmParser.cpp (original)
+++ llvm/trunk/lib/MC/MCParser/AsmParser.cpp Wed Oct 24 13:23:57 2018
@@ -229,7 +229,9 @@ public:
void setParsingInlineAsm(bool V) override {
ParsingInlineAsm = V;
- Lexer.setParsingMSInlineAsm(V);
+ // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
+ // hex integer literals.
+ Lexer.setLexMasmIntegers(V);
}
bool isParsingInlineAsm() override { return ParsingInlineAsm; }
Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp (original)
+++ llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp Wed Oct 24 13:23:57 2018
@@ -3283,7 +3283,6 @@ bool X86AsmParser::ParseDirective(AsmTok
if (IDVal.startswith(".code"))
return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
else if (IDVal.startswith(".att_syntax")) {
- getParser().setParsingInlineAsm(false);
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (Parser.getTok().getString() == "prefix")
Parser.Lex();
@@ -3296,7 +3295,6 @@ bool X86AsmParser::ParseDirective(AsmTok
return false;
} else if (IDVal.startswith(".intel_syntax")) {
getParser().setAssemblerDialect(1);
- getParser().setParsingInlineAsm(true);
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (Parser.getTok().getString() == "noprefix")
Parser.Lex();
Added: llvm/trunk/test/MC/AArch64/macro-hex-int.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AArch64/macro-hex-int.s?rev=345189&view=auto
==============================================================================
--- llvm/trunk/test/MC/AArch64/macro-hex-int.s (added)
+++ llvm/trunk/test/MC/AArch64/macro-hex-int.s Wed Oct 24 13:23:57 2018
@@ -0,0 +1,8 @@
+// RUN: llvm-mc -triple aarch64-elf -filetype=obj %s -o - | llvm-objdump -d -r - | FileCheck %s
+
+.macro do_add sz
+ add v0.\sz, v0.\sz, v0.\sz
+.endm
+
+do_add 8h
+// CHECK: add v0.8h, v0.8h, v0.8h
Modified: llvm/trunk/test/MC/X86/intel-syntax-hex.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/intel-syntax-hex.s?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/intel-syntax-hex.s (original)
+++ llvm/trunk/test/MC/X86/intel-syntax-hex.s Wed Oct 24 13:23:57 2018
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel %s | FileCheck %s
+// RUN: llvm-mc -masm-integers -triple x86_64-unknown-unknown -x86-asm-syntax=intel %s | FileCheck %s
// rdar://12470373
// Checks to make sure we parse the hexadecimal suffix properly.
Modified: llvm/trunk/test/MC/X86/pr27884.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/pr27884.s?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/pr27884.s (original)
+++ llvm/trunk/test/MC/X86/pr27884.s Wed Oct 24 13:23:57 2018
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown %s
+// RUN: llvm-mc -triple x86_64-unknown-unknown %s -masm-integers=1
.intel_syntax
add rbx, 0B0h
Modified: llvm/trunk/test/tools/llvm-mca/X86/intel-syntax.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/intel-syntax.s?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/intel-syntax.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/intel-syntax.s Wed Oct 24 13:23:57 2018
@@ -5,7 +5,7 @@
.intel_syntax noprefix
mov eax, 1
- mov ebx, 0ffh
+ mov ebx, 0xff
imul esi, edi
lea eax, [rsi + rdi]
Modified: llvm/trunk/tools/llvm-mc/llvm-mc.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc/llvm-mc.cpp?rev=345189&r1=345188&r2=345189&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mc/llvm-mc.cpp (original)
+++ llvm/trunk/tools/llvm-mc/llvm-mc.cpp Wed Oct 24 13:23:57 2018
@@ -164,6 +164,10 @@ MainFileName("main-file-name",
static cl::opt<bool> SaveTempLabels("save-temp-labels",
cl::desc("Don't discard temporary labels"));
+static cl::opt<bool> LexMasmIntegers(
+ "masm-integers",
+ cl::desc("Enable binary and hex masm integers (0b110 and 0ABCh)"));
+
static cl::opt<bool> NoExecStack("no-exec-stack",
cl::desc("File doesn't need an exec stack"));
@@ -293,6 +297,7 @@ static int AssembleInput(const char *Pro
return SymbolResult;
Parser->setShowParsedOperands(ShowInstOperands);
Parser->setTargetParser(*TAP);
+ Parser->getLexer().setLexMasmIntegers(LexMasmIntegers);
int Res = Parser->Run(NoInitialTextSection);
More information about the llvm-commits
mailing list