[llvm] 3333c28 - [llvm-ml] Improve indirect call parsing
Eric Astor via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 28 10:17:40 PDT 2022
Author: Alan Zhao
Date: 2022-04-28T13:17:19-04:00
New Revision: 3333c28fc0dc0485e0e08b9d7039e0a501884b7a
URL: https://github.com/llvm/llvm-project/commit/3333c28fc0dc0485e0e08b9d7039e0a501884b7a
DIFF: https://github.com/llvm/llvm-project/commit/3333c28fc0dc0485e0e08b9d7039e0a501884b7a.diff
LOG: [llvm-ml] Improve indirect call parsing
In MASM, if a QWORD symbol is passed to a jmp or call instruction in
64-bit mode or a DWORD or WORD symbol is passed in 32-bit mode, then
MSVC's assembler recognizes that as an indirect call. Additionally, if
the operand is qualified as a ptr, then that should also be an indirect
call.
Furthermore, in 64-bit mode, such operands are implicitly rip-relative
(in fact, MSVC's assembler ml64.exe does not allow explicitly specifying
rip as a base register.)
To keep this patch managable, this patch does not include:
* error messages for wrong operand types (e.g. passing a QWORD in 32-bit
mode)
* resolving indirect calls if the symbol is declared after it's first
use (llvm-ml currently only runs a single pass).
* imlementing the extern keyword (required to resolve
https://crbug.com/762167.)
This patch is likely missing a bunch of edge cases, so please do point
them out in the review.
Reviewed By: epastor, hans, MaskRay
Committed By: epastor (on behalf of ayzhao)
Differential Revision: https://reviews.llvm.org/D124413
Added:
llvm/test/tools/llvm-ml/indirect_branch.asm
Modified:
llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
llvm/lib/Target/X86/AsmParser/X86Operand.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 8008b5304012..31e4744ca9ec 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1107,9 +1107,9 @@ class X86AsmParser : public MCTargetAsmParser {
std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
OperandVector &FinalOperands);
- bool ParseOperand(OperandVector &Operands);
- bool ParseATTOperand(OperandVector &Operands);
- bool ParseIntelOperand(OperandVector &Operands);
+ bool parseOperand(OperandVector &Operands, StringRef Name);
+ bool parseATTOperand(OperandVector &Operands);
+ bool parseIntelOperand(OperandVector &Operands, StringRef Name);
bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
InlineAsmIdentifierInfo &Info, SMLoc &End);
bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
@@ -1736,11 +1736,11 @@ bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
return false;
}
-bool X86AsmParser::ParseOperand(OperandVector &Operands) {
+bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) {
if (isParsingIntelSyntax())
- return ParseIntelOperand(Operands);
+ return parseIntelOperand(Operands, Name);
- return ParseATTOperand(Operands);
+ return parseATTOperand(Operands);
}
bool X86AsmParser::CreateMemForMSInlineAsm(
@@ -2513,7 +2513,7 @@ bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
return false;
}
-bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) {
+bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc Start, End;
@@ -2635,25 +2635,49 @@ bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) {
// When parsing x64 MS-style assembly, all non-absolute references to a named
// variable default to RIP-relative.
- if (Parser.isParsingMasm() && is64BitMode() && SM.getElementSize() > 0) {
- Operands.push_back(X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
- BaseReg, IndexReg, Scale, Start,
- End, Size,
- /*DefaultBaseReg=*/X86::RIP));
- return false;
+ unsigned DefaultBaseReg = X86::NoRegister;
+ bool MaybeDirectBranchDest = true;
+
+ if (Parser.isParsingMasm()) {
+ bool IsUnconditionalBranch =
+ Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
+ if (is64BitMode() && SM.getElementSize() > 0) {
+ DefaultBaseReg = X86::RIP;
+ }
+ if (IsUnconditionalBranch) {
+ if (PtrInOperand) {
+ MaybeDirectBranchDest = false;
+ if (is64BitMode())
+ DefaultBaseReg = X86::RIP;
+ } else if (!BaseReg && !IndexReg && Disp &&
+ Disp->getKind() == MCExpr::SymbolRef) {
+ if (is64BitMode()) {
+ if (SM.getSize() == 8) {
+ MaybeDirectBranchDest = false;
+ DefaultBaseReg = X86::RIP;
+ }
+ } else {
+ if (SM.getSize() == 4 || SM.getSize() == 2)
+ MaybeDirectBranchDest = false;
+ }
+ }
+ }
}
- if ((BaseReg || IndexReg || RegNo))
- Operands.push_back(X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
- BaseReg, IndexReg, Scale, Start,
- End, Size));
+ if ((BaseReg || IndexReg || RegNo || DefaultBaseReg != X86::NoRegister))
+ Operands.push_back(X86Operand::CreateMem(
+ getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End,
+ Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
+ /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest));
else
- Operands.push_back(
- X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size));
+ Operands.push_back(X86Operand::CreateMem(
+ getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(),
+ /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
+ MaybeDirectBranchDest));
return false;
}
-bool X86AsmParser::ParseATTOperand(OperandVector &Operands) {
+bool X86AsmParser::parseATTOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
switch (getLexer().getKind()) {
case AsmToken::Dollar: {
@@ -3409,7 +3433,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Read the operands.
while (true) {
- if (ParseOperand(Operands))
+ if (parseOperand(Operands, Name))
return true;
if (HandleAVX512Operand(Operands))
return true;
diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h
index 00f22190e7d0..075b800f9e20 100644
--- a/llvm/lib/Target/X86/AsmParser/X86Operand.h
+++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h
@@ -72,6 +72,11 @@ struct X86Operand final : public MCParsedAsmOperand {
/// If the memory operand is unsized and there are multiple instruction
/// matches, prefer the one with this size.
unsigned FrontendSize;
+
+ /// If false, then this operand must be a memory operand for an indirect
+ /// branch instruction. Otherwise, this operand may belong to either a
+ /// direct or indirect branch instruction.
+ bool MaybeDirectBranchDest;
};
union {
@@ -209,6 +214,10 @@ struct X86Operand final : public MCParsedAsmOperand {
assert(Kind == Memory && "Invalid access!");
return Mem.FrontendSize;
}
+ bool isMaybeDirectBranchDest() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.MaybeDirectBranchDest;
+ }
bool isToken() const override {return Kind == Token; }
@@ -374,8 +383,9 @@ struct X86Operand final : public MCParsedAsmOperand {
bool isAbsMem() const {
return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
- !getMemIndexReg() && getMemScale() == 1;
+ !getMemIndexReg() && getMemScale() == 1 && isMaybeDirectBranchDest();
}
+
bool isAVX512RC() const{
return isImm();
}
@@ -672,7 +682,7 @@ struct X86Operand final : public MCParsedAsmOperand {
CreateMem(unsigned ModeSize, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
unsigned Size = 0, StringRef SymName = StringRef(),
void *OpDecl = nullptr, unsigned FrontendSize = 0,
- bool UseUpRegs = false) {
+ bool UseUpRegs = false, bool MaybeDirectBranchDest = true) {
auto Res = std::make_unique<X86Operand>(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
@@ -683,6 +693,7 @@ struct X86Operand final : public MCParsedAsmOperand {
Res->Mem.Size = Size;
Res->Mem.ModeSize = ModeSize;
Res->Mem.FrontendSize = FrontendSize;
+ Res->Mem.MaybeDirectBranchDest = MaybeDirectBranchDest;
Res->UseUpRegs = UseUpRegs;
Res->SymName = SymName;
Res->OpDecl = OpDecl;
@@ -697,7 +708,8 @@ struct X86Operand final : public MCParsedAsmOperand {
SMLoc EndLoc, unsigned Size = 0,
unsigned DefaultBaseReg = X86::NoRegister,
StringRef SymName = StringRef(), void *OpDecl = nullptr,
- unsigned FrontendSize = 0, bool UseUpRegs = false) {
+ unsigned FrontendSize = 0, bool UseUpRegs = false,
+ bool MaybeDirectBranchDest = true) {
// We should never just have a displacement, that should be parsed as an
// absolute memory operand.
assert((SegReg || BaseReg || IndexReg || DefaultBaseReg) &&
@@ -716,6 +728,7 @@ struct X86Operand final : public MCParsedAsmOperand {
Res->Mem.Size = Size;
Res->Mem.ModeSize = ModeSize;
Res->Mem.FrontendSize = FrontendSize;
+ Res->Mem.MaybeDirectBranchDest = MaybeDirectBranchDest;
Res->UseUpRegs = UseUpRegs;
Res->SymName = SymName;
Res->OpDecl = OpDecl;
diff --git a/llvm/test/tools/llvm-ml/indirect_branch.asm b/llvm/test/tools/llvm-ml/indirect_branch.asm
new file mode 100644
index 000000000000..c4b28ba0fad9
--- /dev/null
+++ b/llvm/test/tools/llvm-ml/indirect_branch.asm
@@ -0,0 +1,188 @@
+; RUN: llvm-ml -m64 -filetype=s %s /Fo - | FileCheck %s --check-prefixes=CHECK-64,CHECK
+; RUN: llvm-ml -m32 -filetype=s %s /Fo - | FileCheck %s --check-prefixes=CHECK-32,CHECK
+
+.data
+
+ifdef rax
+ fn_ref qword 1
+else
+ fn_ref dword 1
+endif
+
+fn_ref_word word 2
+fn PROC
+
+BranchTargetStruc struc
+ member0 dword ?
+ ifdef rax
+ member1 dword ?
+ endif
+BranchTargetStruc ends
+
+
+ifdef rax
+ fn_ref_struc BranchTargetStruc {3, 3}
+else
+ fn_ref_struc BranchTargetStruc {3}
+endif
+
+.code
+
+t0:
+call fn_ref
+jmp fn_ref
+; CHECK-LABEL: t0:
+; CHECK-64: call qword ptr [rip + fn_ref]
+; CHECK-64: jmp qword ptr [rip + fn_ref]
+; CHECK-32: call dword ptr [fn_ref]
+; CHECK-32: jmp dword ptr [fn_ref]
+
+t1:
+call [fn_ref]
+jmp [fn_ref]
+; CHECK-LABEL: t1:
+; CHECK-64: call qword ptr [rip + fn_ref]
+; CHECK-64: jmp qword ptr [rip + fn_ref]
+; CHECK-32: call dword ptr [fn_ref]
+; CHECK-32: jmp dword ptr [fn_ref]
+
+ifdef rax
+ t2:
+ call qword ptr [fn_ref]
+ jmp qword ptr [fn_ref]
+ ; CHECK-64-LABEL: t2:
+ ; CHECK-64: call qword ptr [rip + fn_ref]
+ ; CHECK-64: jmp qword ptr [rip + fn_ref]
+else
+ t2:
+ call dword ptr [fn_ref]
+ jmp dword ptr [fn_ref]
+ ; CHECK-32-LABEL: t2:
+ ; CHECK-32: call dword ptr [fn_ref]
+ ; CHECK-32: jmp dword ptr [fn_ref]
+
+ t3:
+ call fn_ref_word
+ jmp fn_ref_word
+ ; CHECK-32-LABEL: t3:
+ ; CHECK-32: call word ptr [fn_ref_word]
+ ; CHECK-32-NEXT: jmp word ptr [fn_ref_word]
+
+ t4:
+ call [fn_ref_word]
+ jmp [fn_ref_word]
+ ; CHECK-32-LABEL: t4:
+ ; CHECK-32: call word ptr [fn_ref_word]
+ ; CHECK-32-NEXT: jmp word ptr [fn_ref_word]
+
+ t5:
+ call word ptr [fn_ref_word]
+ jmp word ptr [fn_ref_word]
+ ; CHECK-32-LABEL: t5:
+ ; CHECK-32: call word ptr [fn_ref_word]
+ ; CHECK-32-NEXT: jmp word ptr [fn_ref_word]
+endif
+
+t6:
+call t6
+jmp t6
+; CHECK-LABEL: t6:
+; CHECK: call t6
+; CHECK-NEXT: jmp t6
+
+t7:
+call [t7]
+jmp [t7]
+; CHECK-LABEL: t7:
+; CHECK: call t7
+; CHECK-NEXT: jmp t7
+
+ifdef rax
+ t8:
+ call qword ptr [t8]
+ jmp qword ptr [t8]
+ ; CHECK-64-LABEL: t8:
+ ; CHECK-64: call qword ptr [rip + t8]
+ ; CHECK-64-NEXT: jmp qword ptr [rip + t8]
+else
+ t8:
+ call dword ptr [t8]
+ jmp dword ptr [t8]
+ ; CHECK-32-LABEL: t8:
+ ; CHECK-32: call dword ptr [t8]
+ ; CHECK-32-NEXT: jmp dword ptr [t8]
+endif
+
+t9:
+call fn
+jmp fn
+; CHECK-LABEL: t9:
+; CHECK: call fn
+; CHECK-NEXT: jmp fn
+
+t10:
+call [fn]
+jmp [fn]
+; CHECK-LABEL: t10:
+; CHECK: call fn
+; CHECK-NEXT: jmp fn
+
+ifdef rax
+ t11:
+ call qword ptr [fn]
+ jmp qword ptr [fn]
+ ; CHECK-64-LABEL: t11:
+ ; CHECK-64: call qword ptr [rip + fn]
+ ; CHECK-64-NEXT: jmp qword ptr [rip + fn]
+else
+ t11:
+ call dword ptr [fn]
+ jmp dword ptr [fn]
+ ; CHECK-32-LABEL: t11:
+ ; CHECK-32: call dword ptr [fn]
+ ; CHECK-32-NEXT: jmp dword ptr [fn]
+endif
+
+t12:
+call fn_ref_struc
+jmp fn_ref_struc
+; CHECK-LABEL: t12:
+; CHECK-64: call qword ptr [rip + fn_ref_struc]
+; CHECK-64: jmp qword ptr [rip + fn_ref_struc]
+; CHECK-32: call dword ptr [fn_ref_struc]
+; CHECK-32: jmp dword ptr [fn_ref_struc]
+
+t13:
+call [fn_ref_struc]
+jmp [fn_ref_struc]
+; CHECK-LABEL: t13:
+; CHECK-64: call qword ptr [rip + fn_ref_struc]
+; CHECK-64: jmp qword ptr [rip + fn_ref_struc]
+; CHECK-32: call dword ptr [fn_ref_struc]
+; CHECK-32: jmp dword ptr [fn_ref_struc]
+
+ifdef rax
+ t14:
+ call qword ptr [fn_ref_struc]
+ jmp qword ptr [fn_ref_struc]
+ ; CHECK-64-LABEL: t14:
+ ; CHECK-64: call qword ptr [rip + fn_ref_struc]
+ ; CHECK-64: jmp qword ptr [rip + fn_ref_struc]
+else
+ t14:
+ call dword ptr [fn_ref_struc]
+ jmp dword ptr [fn_ref_struc]
+ ; CHECK-32-LABEL: t14:
+ ; CHECK-32: call dword ptr [fn_ref_struc]
+ ; CHECK-32: jmp dword ptr [fn_ref_struc]
+endif
+
+t15:
+je t15
+; CHECK-LABEL: t15:
+; CHECK: je t15
+
+t16:
+je [t16];
+; CHECK-LABEL: t16:
+; CHECK: je t16
More information about the llvm-commits
mailing list