[llvm] 3333c28 - [llvm-ml] Improve indirect call parsing

Eric Astor via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 28 10:17:40 PDT 2022


Author: Alan Zhao
Date: 2022-04-28T13:17:19-04:00
New Revision: 3333c28fc0dc0485e0e08b9d7039e0a501884b7a

URL: https://github.com/llvm/llvm-project/commit/3333c28fc0dc0485e0e08b9d7039e0a501884b7a
DIFF: https://github.com/llvm/llvm-project/commit/3333c28fc0dc0485e0e08b9d7039e0a501884b7a.diff

LOG: [llvm-ml] Improve indirect call parsing

In MASM, if a QWORD symbol is passed to a jmp or call instruction in
64-bit mode or a DWORD or WORD symbol is passed in 32-bit mode, then
MSVC's assembler recognizes that as an indirect call. Additionally, if
the operand is qualified as a ptr, then that should also be an indirect
call.

Furthermore, in 64-bit mode, such operands are implicitly rip-relative
(in fact, MSVC's assembler ml64.exe does not allow explicitly specifying
rip as a base register.)

To keep this patch managable, this patch does not include:
* error messages for wrong operand types (e.g. passing a QWORD in 32-bit
  mode)
* resolving indirect calls if the symbol is declared after it's first
  use (llvm-ml currently only runs a single pass).
* imlementing the extern keyword (required to resolve
  https://crbug.com/762167.)

This patch is likely missing a bunch of edge cases, so please do point
them out in the review.

Reviewed By: epastor, hans, MaskRay

Committed By: epastor (on behalf of ayzhao)

Differential Revision: https://reviews.llvm.org/D124413

Added: 
    llvm/test/tools/llvm-ml/indirect_branch.asm

Modified: 
    llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
    llvm/lib/Target/X86/AsmParser/X86Operand.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 8008b5304012..31e4744ca9ec 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1107,9 +1107,9 @@ class X86AsmParser : public MCTargetAsmParser {
                             std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
   bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
                                OperandVector &FinalOperands);
-  bool ParseOperand(OperandVector &Operands);
-  bool ParseATTOperand(OperandVector &Operands);
-  bool ParseIntelOperand(OperandVector &Operands);
+  bool parseOperand(OperandVector &Operands, StringRef Name);
+  bool parseATTOperand(OperandVector &Operands);
+  bool parseIntelOperand(OperandVector &Operands, StringRef Name);
   bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
                                 InlineAsmIdentifierInfo &Info, SMLoc &End);
   bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
@@ -1736,11 +1736,11 @@ bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
   return false;
 }
 
-bool X86AsmParser::ParseOperand(OperandVector &Operands) {
+bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) {
   if (isParsingIntelSyntax())
-    return ParseIntelOperand(Operands);
+    return parseIntelOperand(Operands, Name);
 
-  return ParseATTOperand(Operands);
+  return parseATTOperand(Operands);
 }
 
 bool X86AsmParser::CreateMemForMSInlineAsm(
@@ -2513,7 +2513,7 @@ bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
   return false;
 }
 
-bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) {
+bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
   MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
   SMLoc Start, End;
@@ -2635,25 +2635,49 @@ bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) {
 
   // When parsing x64 MS-style assembly, all non-absolute references to a named
   // variable default to RIP-relative.
-  if (Parser.isParsingMasm() && is64BitMode() && SM.getElementSize() > 0) {
-    Operands.push_back(X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
-                                             BaseReg, IndexReg, Scale, Start,
-                                             End, Size,
-                                             /*DefaultBaseReg=*/X86::RIP));
-    return false;
+  unsigned DefaultBaseReg = X86::NoRegister;
+  bool MaybeDirectBranchDest = true;
+
+  if (Parser.isParsingMasm()) {
+    bool IsUnconditionalBranch =
+        Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
+    if (is64BitMode() && SM.getElementSize() > 0) {
+      DefaultBaseReg = X86::RIP;
+    }
+    if (IsUnconditionalBranch) {
+      if (PtrInOperand) {
+        MaybeDirectBranchDest = false;
+        if (is64BitMode())
+          DefaultBaseReg = X86::RIP;
+      } else if (!BaseReg && !IndexReg && Disp &&
+                 Disp->getKind() == MCExpr::SymbolRef) {
+        if (is64BitMode()) {
+          if (SM.getSize() == 8) {
+            MaybeDirectBranchDest = false;
+            DefaultBaseReg = X86::RIP;
+          }
+        } else {
+          if (SM.getSize() == 4 || SM.getSize() == 2)
+            MaybeDirectBranchDest = false;
+        }
+      }
+    }
   }
 
-  if ((BaseReg || IndexReg || RegNo))
-    Operands.push_back(X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
-                                             BaseReg, IndexReg, Scale, Start,
-                                             End, Size));
+  if ((BaseReg || IndexReg || RegNo || DefaultBaseReg != X86::NoRegister))
+    Operands.push_back(X86Operand::CreateMem(
+        getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End,
+        Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
+        /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest));
   else
-    Operands.push_back(
-        X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size));
+    Operands.push_back(X86Operand::CreateMem(
+        getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(),
+        /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
+        MaybeDirectBranchDest));
   return false;
 }
 
-bool X86AsmParser::ParseATTOperand(OperandVector &Operands) {
+bool X86AsmParser::parseATTOperand(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   switch (getLexer().getKind()) {
   case AsmToken::Dollar: {
@@ -3409,7 +3433,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
 
     // Read the operands.
     while (true) {
-      if (ParseOperand(Operands))
+      if (parseOperand(Operands, Name))
         return true;
       if (HandleAVX512Operand(Operands))
         return true;

diff  --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h
index 00f22190e7d0..075b800f9e20 100644
--- a/llvm/lib/Target/X86/AsmParser/X86Operand.h
+++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h
@@ -72,6 +72,11 @@ struct X86Operand final : public MCParsedAsmOperand {
     /// If the memory operand is unsized and there are multiple instruction
     /// matches, prefer the one with this size.
     unsigned FrontendSize;
+
+    /// If false, then this operand must be a memory operand for an indirect
+    /// branch instruction. Otherwise, this operand may belong to either a
+    /// direct or indirect branch instruction.
+    bool MaybeDirectBranchDest;
   };
 
   union {
@@ -209,6 +214,10 @@ struct X86Operand final : public MCParsedAsmOperand {
     assert(Kind == Memory && "Invalid access!");
     return Mem.FrontendSize;
   }
+  bool isMaybeDirectBranchDest() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.MaybeDirectBranchDest;
+  }
 
   bool isToken() const override {return Kind == Token; }
 
@@ -374,8 +383,9 @@ struct X86Operand final : public MCParsedAsmOperand {
 
   bool isAbsMem() const {
     return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
-      !getMemIndexReg() && getMemScale() == 1;
+           !getMemIndexReg() && getMemScale() == 1 && isMaybeDirectBranchDest();
   }
+
   bool isAVX512RC() const{
       return isImm();
   }
@@ -672,7 +682,7 @@ struct X86Operand final : public MCParsedAsmOperand {
   CreateMem(unsigned ModeSize, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
             unsigned Size = 0, StringRef SymName = StringRef(),
             void *OpDecl = nullptr, unsigned FrontendSize = 0,
-            bool UseUpRegs = false) {
+            bool UseUpRegs = false, bool MaybeDirectBranchDest = true) {
     auto Res = std::make_unique<X86Operand>(Memory, StartLoc, EndLoc);
     Res->Mem.SegReg   = 0;
     Res->Mem.Disp     = Disp;
@@ -683,6 +693,7 @@ struct X86Operand final : public MCParsedAsmOperand {
     Res->Mem.Size     = Size;
     Res->Mem.ModeSize = ModeSize;
     Res->Mem.FrontendSize = FrontendSize;
+    Res->Mem.MaybeDirectBranchDest = MaybeDirectBranchDest;
     Res->UseUpRegs = UseUpRegs;
     Res->SymName      = SymName;
     Res->OpDecl       = OpDecl;
@@ -697,7 +708,8 @@ struct X86Operand final : public MCParsedAsmOperand {
             SMLoc EndLoc, unsigned Size = 0,
             unsigned DefaultBaseReg = X86::NoRegister,
             StringRef SymName = StringRef(), void *OpDecl = nullptr,
-            unsigned FrontendSize = 0, bool UseUpRegs = false) {
+            unsigned FrontendSize = 0, bool UseUpRegs = false,
+            bool MaybeDirectBranchDest = true) {
     // We should never just have a displacement, that should be parsed as an
     // absolute memory operand.
     assert((SegReg || BaseReg || IndexReg || DefaultBaseReg) &&
@@ -716,6 +728,7 @@ struct X86Operand final : public MCParsedAsmOperand {
     Res->Mem.Size     = Size;
     Res->Mem.ModeSize = ModeSize;
     Res->Mem.FrontendSize = FrontendSize;
+    Res->Mem.MaybeDirectBranchDest = MaybeDirectBranchDest;
     Res->UseUpRegs = UseUpRegs;
     Res->SymName      = SymName;
     Res->OpDecl       = OpDecl;

diff  --git a/llvm/test/tools/llvm-ml/indirect_branch.asm b/llvm/test/tools/llvm-ml/indirect_branch.asm
new file mode 100644
index 000000000000..c4b28ba0fad9
--- /dev/null
+++ b/llvm/test/tools/llvm-ml/indirect_branch.asm
@@ -0,0 +1,188 @@
+; RUN: llvm-ml -m64 -filetype=s %s /Fo - | FileCheck %s --check-prefixes=CHECK-64,CHECK
+; RUN: llvm-ml -m32 -filetype=s %s /Fo - | FileCheck %s --check-prefixes=CHECK-32,CHECK
+
+.data
+
+ifdef rax
+  fn_ref qword 1
+else
+  fn_ref dword 1
+endif
+
+fn_ref_word word 2
+fn PROC
+
+BranchTargetStruc struc
+  member0 dword ?
+  ifdef rax
+    member1 dword ?
+  endif
+BranchTargetStruc ends
+
+
+ifdef rax
+  fn_ref_struc BranchTargetStruc {3, 3}
+else
+  fn_ref_struc BranchTargetStruc {3}
+endif
+
+.code
+
+t0:
+call fn_ref
+jmp fn_ref
+; CHECK-LABEL: t0:
+; CHECK-64: call qword ptr [rip + fn_ref]
+; CHECK-64: jmp qword ptr [rip + fn_ref]
+; CHECK-32: call dword ptr [fn_ref]
+; CHECK-32: jmp dword ptr [fn_ref]
+
+t1:
+call [fn_ref]
+jmp [fn_ref]
+; CHECK-LABEL: t1:
+; CHECK-64: call qword ptr [rip + fn_ref]
+; CHECK-64: jmp qword ptr [rip + fn_ref]
+; CHECK-32: call dword ptr [fn_ref]
+; CHECK-32: jmp dword ptr [fn_ref]
+
+ifdef rax
+  t2:
+  call qword ptr [fn_ref]
+  jmp qword ptr [fn_ref]
+  ; CHECK-64-LABEL: t2:
+  ; CHECK-64: call qword ptr [rip + fn_ref]
+  ; CHECK-64: jmp qword ptr [rip + fn_ref]
+else
+  t2:
+  call dword ptr [fn_ref]
+  jmp dword ptr [fn_ref]
+  ; CHECK-32-LABEL: t2:
+  ; CHECK-32: call dword ptr [fn_ref]
+  ; CHECK-32: jmp dword ptr [fn_ref]
+
+  t3:
+  call fn_ref_word
+  jmp fn_ref_word
+  ; CHECK-32-LABEL: t3:
+  ; CHECK-32: call word ptr [fn_ref_word]
+  ; CHECK-32-NEXT: jmp word ptr [fn_ref_word]
+
+  t4:
+  call [fn_ref_word]
+  jmp [fn_ref_word]
+  ; CHECK-32-LABEL: t4:
+  ; CHECK-32: call word ptr [fn_ref_word]
+  ; CHECK-32-NEXT: jmp word ptr [fn_ref_word]
+
+  t5:
+  call word ptr [fn_ref_word]
+  jmp word ptr [fn_ref_word]
+  ; CHECK-32-LABEL: t5:
+  ; CHECK-32: call word ptr [fn_ref_word]
+  ; CHECK-32-NEXT: jmp word ptr [fn_ref_word]
+endif
+
+t6:
+call t6
+jmp t6
+; CHECK-LABEL: t6:
+; CHECK: call t6
+; CHECK-NEXT: jmp t6
+
+t7:
+call [t7]
+jmp [t7]
+; CHECK-LABEL: t7:
+; CHECK: call t7
+; CHECK-NEXT: jmp t7
+
+ifdef rax
+  t8:
+  call qword ptr [t8]
+  jmp qword ptr [t8]
+  ; CHECK-64-LABEL: t8:
+  ; CHECK-64: call qword ptr [rip + t8]
+  ; CHECK-64-NEXT: jmp qword ptr [rip + t8]
+else
+  t8:
+  call dword ptr [t8]
+  jmp dword ptr [t8]
+  ; CHECK-32-LABEL: t8:
+  ; CHECK-32: call dword ptr [t8]
+  ; CHECK-32-NEXT: jmp dword ptr [t8]
+endif
+
+t9:
+call fn
+jmp fn
+; CHECK-LABEL: t9:
+; CHECK: call fn
+; CHECK-NEXT: jmp fn
+
+t10:
+call [fn]
+jmp [fn]
+; CHECK-LABEL: t10:
+; CHECK: call fn
+; CHECK-NEXT: jmp fn
+
+ifdef rax
+  t11:
+  call qword ptr [fn]
+  jmp qword ptr [fn]
+  ; CHECK-64-LABEL: t11:
+  ; CHECK-64: call qword ptr [rip + fn]
+  ; CHECK-64-NEXT: jmp qword ptr [rip + fn]
+else
+  t11:
+  call dword ptr [fn]
+  jmp dword ptr [fn]
+  ; CHECK-32-LABEL: t11:
+  ; CHECK-32: call dword ptr [fn]
+  ; CHECK-32-NEXT: jmp dword ptr [fn]
+endif
+
+t12:
+call fn_ref_struc
+jmp fn_ref_struc
+; CHECK-LABEL: t12:
+; CHECK-64: call qword ptr [rip + fn_ref_struc]
+; CHECK-64: jmp qword ptr [rip + fn_ref_struc]
+; CHECK-32: call dword ptr [fn_ref_struc]
+; CHECK-32: jmp dword ptr [fn_ref_struc]
+
+t13:
+call [fn_ref_struc]
+jmp [fn_ref_struc]
+; CHECK-LABEL: t13:
+; CHECK-64: call qword ptr [rip + fn_ref_struc]
+; CHECK-64: jmp qword ptr [rip + fn_ref_struc]
+; CHECK-32: call dword ptr [fn_ref_struc]
+; CHECK-32: jmp dword ptr [fn_ref_struc]
+
+ifdef rax
+  t14:
+  call qword ptr [fn_ref_struc]
+  jmp qword ptr [fn_ref_struc]
+  ; CHECK-64-LABEL: t14:
+  ; CHECK-64: call qword ptr [rip + fn_ref_struc]
+  ; CHECK-64: jmp qword ptr [rip + fn_ref_struc]
+else
+  t14:
+  call dword ptr [fn_ref_struc]
+  jmp dword ptr [fn_ref_struc]
+  ; CHECK-32-LABEL: t14:
+  ; CHECK-32: call dword ptr [fn_ref_struc]
+  ; CHECK-32: jmp dword ptr [fn_ref_struc]
+endif
+
+t15:
+je t15
+; CHECK-LABEL: t15:
+; CHECK: je t15
+
+t16:
+je [t16];
+; CHECK-LABEL: t16:
+; CHECK: je t16


        


More information about the llvm-commits mailing list