[llvm] 9b4f747 - [ms] [llvm-ml] Implement support for PROC NEAR/FAR (#131707)

via llvm-commits llvm-commits at lists.llvm.org
Fri May 2 15:09:46 PDT 2025


Author: Eric Astor
Date: 2025-05-02T18:09:43-04:00
New Revision: 9b4f747827489c5969ff903c728e65c545f58eaa

URL: https://github.com/llvm/llvm-project/commit/9b4f747827489c5969ff903c728e65c545f58eaa
DIFF: https://github.com/llvm/llvm-project/commit/9b4f747827489c5969ff903c728e65c545f58eaa.diff

LOG: [ms] [llvm-ml] Implement support for PROC NEAR/FAR (#131707)

Matches ML.EXE by translating "ret" instructions inside a `PROC FAR` to "retf", and automatically prepending a `push cs` to all near calls to a `PROC FAR`.

Added: 
    llvm/include/llvm/MC/MCParser/MCMasmParser.h
    llvm/test/tools/llvm-ml/proc_distance.asm

Modified: 
    llvm/include/llvm/MC/MCSymbolCOFF.h
    llvm/lib/MC/MCParser/COFFMasmParser.cpp
    llvm/lib/MC/MCParser/MasmParser.cpp
    llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCParser/MCMasmParser.h b/llvm/include/llvm/MC/MCParser/MCMasmParser.h
new file mode 100644
index 0000000000000..a34c6eba8bc59
--- /dev/null
+++ b/llvm/include/llvm/MC/MCParser/MCMasmParser.h
@@ -0,0 +1,29 @@
+//===- llvm/MC/MasmParser.h - MASM Parser Interface -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCPARSER_MCMASMPARSER_H
+#define LLVM_MC_MCPARSER_MCMASMPARSER_H
+
+#include "llvm/MC/MCParser/MCAsmParser.h"
+
+namespace llvm {
+
+/// MASM-type assembler parser interface.
+class MCMasmParser : public MCAsmParser {
+public:
+  virtual bool getDefaultRetIsFar() const = 0;
+  virtual void setDefaultRetIsFar(bool IsFar) = 0;
+
+  bool isParsingMasm() const override { return true; }
+
+  static bool classof(const MCAsmParser *AP) { return AP->isParsingMasm(); }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_MC_MCPARSER_MCMASMPARSER_H

diff  --git a/llvm/include/llvm/MC/MCSymbolCOFF.h b/llvm/include/llvm/MC/MCSymbolCOFF.h
index 2964c521e8e44..c0ddef5cfee50 100644
--- a/llvm/include/llvm/MC/MCSymbolCOFF.h
+++ b/llvm/include/llvm/MC/MCSymbolCOFF.h
@@ -25,6 +25,7 @@ class MCSymbolCOFF : public MCSymbol {
     SF_ClassShift = 0,
 
     SF_SafeSEH = 0x0100,
+    SF_FarProc = 0x0200,
     SF_WeakExternalCharacteristicsMask = 0x0E00,
     SF_WeakExternalCharacteristicsShift = 9,
   };
@@ -66,6 +67,9 @@ class MCSymbolCOFF : public MCSymbol {
     modifyFlags(SF_SafeSEH, SF_SafeSEH);
   }
 
+  bool isFarProc() const { return getFlags() & SF_FarProc; }
+  void setIsFarProc() const { modifyFlags(SF_FarProc, SF_FarProc); }
+
   static bool classof(const MCSymbol *S) { return S->isCOFF(); }
 };
 

diff  --git a/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/llvm/lib/MC/MCParser/COFFMasmParser.cpp
index 8464a2392680b..94f69402ad082 100644
--- a/llvm/lib/MC/MCParser/COFFMasmParser.cpp
+++ b/llvm/lib/MC/MCParser/COFFMasmParser.cpp
@@ -12,7 +12,9 @@
 #include "llvm/MC/MCAsmMacro.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCParser/MCMasmParser.h"
 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCStreamer.h"
@@ -41,6 +43,7 @@ class COFFMasmParser : public MCAsmParserExtension {
                           StringRef COMDATSymName, COFF::COMDATType Type,
                           Align Alignment);
 
+  bool parseDirectiveModel(StringRef, SMLoc);
   bool parseDirectiveProc(StringRef, SMLoc);
   bool parseDirectiveEndProc(StringRef, SMLoc);
   bool parseDirectiveSegment(StringRef, SMLoc);
@@ -167,7 +170,7 @@ class COFFMasmParser : public MCAsmParserExtension {
     // .exit
     // .fardata
     // .fardata?
-    addDirectiveHandler<&COFFMasmParser::IgnoreDirective>(".model");
+    addDirectiveHandler<&COFFMasmParser::parseDirectiveModel>(".model");
     // .stack
     // .startup
 
@@ -201,8 +204,13 @@ class COFFMasmParser : public MCAsmParserExtension {
   }
 
   /// Stack of active procedure definitions.
-  SmallVector<StringRef, 1> CurrentProcedures;
-  SmallVector<bool, 1> CurrentProceduresFramed;
+  enum ProcDistance { PROC_DISTANCE_NEAR = 0, PROC_DISTANCE_FAR = 1 };
+  struct ProcInfo {
+    StringRef Name;
+    ProcDistance Distance = PROC_DISTANCE_NEAR;
+    bool IsFramed = false;
+  };
+  SmallVector<ProcInfo, 1> CurrentProcedures;
 
 public:
   COFFMasmParser() = default;
@@ -435,48 +443,75 @@ bool COFFMasmParser::parseDirectiveOption(StringRef Directive, SMLoc Loc) {
   return false;
 }
 
+/// parseDirectiveModel
+///  ::= ".model" "flat"
+bool COFFMasmParser::parseDirectiveModel(StringRef Directive, SMLoc Loc) {
+  if (!getLexer().is(AsmToken::Identifier))
+    return TokError("expected identifier in directive");
+
+  StringRef ModelType = getTok().getIdentifier();
+  if (!ModelType.equals_insensitive("flat")) {
+    return TokError(
+        "expected 'flat' for memory model; no other models supported");
+  }
+
+  // Ignore; no action necessary.
+  Lex();
+  return false;
+}
+
 /// parseDirectiveProc
 /// TODO(epastor): Implement parameters and other attributes.
-///  ::= label "proc" [[distance]]
+///  ::= label "proc" [[distance]] [[frame]]
 ///          statements
 ///      label "endproc"
 bool COFFMasmParser::parseDirectiveProc(StringRef Directive, SMLoc Loc) {
   if (!getStreamer().getCurrentFragment())
     return Error(getTok().getLoc(), "expected section directive");
 
-  StringRef Label;
-  if (getParser().parseIdentifier(Label))
+  ProcInfo Proc;
+  if (getParser().parseIdentifier(Proc.Name))
     return Error(Loc, "expected identifier for procedure");
-  if (getLexer().is(AsmToken::Identifier)) {
+  while (getLexer().is(AsmToken::Identifier)) {
     StringRef nextVal = getTok().getString();
     SMLoc nextLoc = getTok().getLoc();
     if (nextVal.equals_insensitive("far")) {
-      // TODO(epastor): Handle far procedure definitions.
       Lex();
-      return Error(nextLoc, "far procedure definitions not yet supported");
+      Proc.Distance = PROC_DISTANCE_FAR;
+      nextVal = getTok().getString();
+      nextLoc = getTok().getLoc();
     } else if (nextVal.equals_insensitive("near")) {
       Lex();
+      Proc.Distance = PROC_DISTANCE_NEAR;
+      nextVal = getTok().getString();
+      nextLoc = getTok().getLoc();
+    } else if (nextVal.equals_insensitive("frame")) {
+      Lex();
+      Proc.IsFramed = true;
       nextVal = getTok().getString();
       nextLoc = getTok().getLoc();
+    } else {
+      break;
     }
   }
-  MCSymbolCOFF *Sym = cast<MCSymbolCOFF>(getContext().getOrCreateSymbol(Label));
+  MCSymbolCOFF *Sym =
+      cast<MCSymbolCOFF>(getContext().getOrCreateSymbol(Proc.Name));
 
   // Define symbol as simple external function
   Sym->setExternal(true);
   Sym->setType(COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT);
+  if (Proc.Distance == PROC_DISTANCE_FAR)
+    Sym->setIsFarProc();
+
+  cast<MCMasmParser>(getParser())
+      .setDefaultRetIsFar(Proc.Distance == PROC_DISTANCE_FAR);
 
-  bool Framed = false;
-  if (getLexer().is(AsmToken::Identifier) &&
-      getTok().getString().equals_insensitive("frame")) {
-    Lex();
-    Framed = true;
+  if (Proc.IsFramed) {
     getStreamer().emitWinCFIStartProc(Sym, Loc);
   }
   getStreamer().emitLabel(Sym, Loc);
 
-  CurrentProcedures.push_back(Label);
-  CurrentProceduresFramed.push_back(Framed);
+  CurrentProcedures.push_back(std::move(Proc));
   return false;
 }
 bool COFFMasmParser::parseDirectiveEndProc(StringRef Directive, SMLoc Loc) {
@@ -487,15 +522,18 @@ bool COFFMasmParser::parseDirectiveEndProc(StringRef Directive, SMLoc Loc) {
 
   if (CurrentProcedures.empty())
     return Error(Loc, "endp outside of procedure block");
-  else if (!CurrentProcedures.back().equals_insensitive(Label))
+  else if (!CurrentProcedures.back().Name.equals_insensitive(Label))
     return Error(LabelLoc, "endp does not match current procedure '" +
-                               CurrentProcedures.back() + "'");
+                               CurrentProcedures.back().Name + "'");
 
-  if (CurrentProceduresFramed.back()) {
+  if (CurrentProcedures.back().IsFramed) {
     getStreamer().emitWinCFIEndProc(Loc);
   }
   CurrentProcedures.pop_back();
-  CurrentProceduresFramed.pop_back();
+  cast<MCMasmParser>(getParser())
+      .setDefaultRetIsFar(!CurrentProcedures.empty() &&
+                          CurrentProcedures.back().Distance ==
+                              PROC_DISTANCE_FAR);
   return false;
 }
 

diff  --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 51ac19c623732..2e3d3d3890add 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -36,6 +36,7 @@
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCParser/MCMasmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
 #include "llvm/MC/MCRegisterInfo.h"
@@ -65,6 +66,7 @@
 #include <memory>
 #include <optional>
 #include <sstream>
+#include <stdbool.h>
 #include <string>
 #include <tuple>
 #include <utility>
@@ -373,7 +375,7 @@ FieldInitializer &FieldInitializer::operator=(FieldInitializer &&Initializer) {
 /// The concrete assembly parser instance.
 // Note that this is a full MCAsmParser, not an MCAsmParserExtension!
 // It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc.
-class MasmParser : public MCAsmParser {
+class MasmParser : public MCMasmParser {
 private:
   SourceMgr::DiagHandlerTy SavedDiagHandler;
   void *SavedDiagContext;
@@ -448,6 +450,9 @@ class MasmParser : public MCAsmParser {
   /// Are we parsing ms-style inline assembly?
   bool ParsingMSInlineAsm = false;
 
+  /// Is the current default `ret` instruction far?
+  bool DefaultRetIsFar = false;
+
   // Current <...> expression depth.
   unsigned AngleBracketDepth = 0U;
 
@@ -473,6 +478,14 @@ class MasmParser : public MCAsmParser {
     DirectiveKindMap[Directive] = DirectiveKindMap[Alias];
   }
 
+  /// @name MCMasmParser Interface
+  /// {
+
+  bool getDefaultRetIsFar() const override { return DefaultRetIsFar; }
+  void setDefaultRetIsFar(bool IsFar) override { DefaultRetIsFar = IsFar; }
+
+  /// }
+
   /// @name MCAsmParser Interface
   /// {
 
@@ -504,8 +517,6 @@ class MasmParser : public MCAsmParser {
   }
   bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
 
-  bool isParsingMasm() const override { return true; }
-
   bool defineMacro(StringRef Name, StringRef Value) override;
 
   bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override;

diff  --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 8221679f1969c..3c52997d08c7e 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -25,6 +25,7 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCMasmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
 #include "llvm/MC/MCRegisterInfo.h"
@@ -32,6 +33,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolCOFF.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
@@ -1200,6 +1202,10 @@ class X86AsmParser : public MCTargetAsmParser {
   void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
                          MCStreamer &Out, bool MatchingInlineAsm);
 
+  void MatchMASMFarCallToNear(SMLoc IDLoc, X86Operand &Op,
+                              OperandVector &Operands, MCStreamer &Out,
+                              bool MatchingInlineAsm);
+
   bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
                            bool MatchingInlineAsm);
 
@@ -2738,11 +2744,11 @@ bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
   if ((BaseReg || IndexReg || RegNo || DefaultBaseReg))
     Operands.push_back(X86Operand::CreateMem(
         getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End,
-        Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
+        Size, DefaultBaseReg, /*SymName=*/SM.getSymName(), /*OpDecl=*/nullptr,
         /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest));
   else
     Operands.push_back(X86Operand::CreateMem(
-        getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(),
+        getPointerWidth(), Disp, Start, End, Size, /*SymName=*/SM.getSymName(),
         /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
         MaybeDirectBranchDest));
   return false;
@@ -3440,6 +3446,14 @@ bool X86AsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
     }
   }
 
+  if (Parser.isParsingMasm() && !is64BitMode()) {
+    // MASM implicitly converts "ret" to "retf" in far procedures; this is
+    // reflected in the default return type in the MCContext.
+    if (PatchedName == "ret" &&
+        cast<MCMasmParser>(getParser()).getDefaultRetIsFar())
+      PatchedName = "retf";
+  }
+
   // Determine whether this is an instruction prefix.
   // FIXME:
   // Enhance prefixes integrity robustness. for example, following forms
@@ -4128,6 +4142,11 @@ bool X86AsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   // First, handle aliases that expand to multiple instructions.
   MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
                     Out, MatchingInlineAsm);
+  if (getParser().isParsingMasm() && !is64BitMode()) {
+    MatchMASMFarCallToNear(IDLoc, static_cast<X86Operand &>(*Operands[0]),
+                           Operands, Out, MatchingInlineAsm);
+  }
+
   unsigned Prefixes = getPrefixes(Operands);
 
   MCInst Inst;
@@ -4189,6 +4208,37 @@ void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
   }
 }
 
+void X86AsmParser::MatchMASMFarCallToNear(SMLoc IDLoc, X86Operand &Op,
+                                          OperandVector &Operands,
+                                          MCStreamer &Out,
+                                          bool MatchingInlineAsm) {
+  // FIXME: This should be replaced with a real .td file alias mechanism.
+  // Also, MatchInstructionImpl should actually *do* the EmitInstruction
+  // call.
+  if (Op.getToken() != "call")
+    return;
+  // This is a call instruction...
+
+  X86Operand &Operand = static_cast<X86Operand &>(*Operands[1]);
+  MCSymbol *Sym = getContext().lookupSymbol(Operand.getSymName());
+  if (Sym == nullptr || !Sym->isInSection() || !Sym->isCOFF() ||
+      !dyn_cast<MCSymbolCOFF>(Sym)->isFarProc())
+    return;
+  // Sym is a reference to a far proc in a code section....
+
+  if (Out.getCurrentSectionOnly() == &Sym->getSection()) {
+    // This is a call to a symbol declared as a far proc, and will be emitted as
+    // a near call... so we need to explicitly push the code section register
+    // before the call.
+    MCInst Inst;
+    Inst.setOpcode(X86::PUSH32r);
+    Inst.addOperand(MCOperand::createReg(MCRegister(X86::CS)));
+    Inst.setLoc(IDLoc);
+    if (!MatchingInlineAsm)
+      emitInstruction(Inst, Operands, Out);
+  }
+}
+
 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
                                        const FeatureBitset &MissingFeatures,
                                        bool MatchingInlineAsm) {

diff  --git a/llvm/test/tools/llvm-ml/proc_distance.asm b/llvm/test/tools/llvm-ml/proc_distance.asm
new file mode 100644
index 0000000000000..71db903640b42
--- /dev/null
+++ b/llvm/test/tools/llvm-ml/proc_distance.asm
@@ -0,0 +1,56 @@
+; RUN: llvm-ml -m32 -filetype=s %s /Fo - | FileCheck %s
+
+.code
+
+DefaultProc PROC
+  ret
+DefaultProc ENDP
+; CHECK: DefaultProc:
+; CHECK: {{^ *}}ret{{ *$}}
+
+t1:
+call DefaultProc
+; CHECK: t1:
+; CHECK-NEXT: call DefaultProc
+
+NearProc PROC NEAR
+  ret
+NearProc ENDP
+; CHECK: NearProc:
+; CHECK: {{^ *}}ret{{ *$}}
+
+t2:
+call NearProc
+; CHECK: t2:
+; CHECK-NEXT: call NearProc
+
+FarProcInCode PROC FAR
+  ret
+FarProcInCode ENDP
+; CHECK: FarProcInCode:
+; CHECK: {{^ *}}retf{{ *$}}
+
+t3:
+call FarProcInCode
+; CHECK: t3:
+; CHECK-NEXT: push cs
+; CHECK-NEXT: call FarProcInCode
+
+FarCode SEGMENT SHARED NOPAGE NOCACHE INFO READ WRITE EXECUTE DISCARD
+
+FarProcInFarCode PROC FAR
+  ret
+FarProcInFarCode ENDP
+; CHECK: FarProcInFarCode:
+; CHECK: {{^ *}}retf{{ *$}}
+
+FarCode ENDS
+
+.code
+
+t4:
+call FarProcInFarCode
+; CHECK: t4:
+; CHECK-NEXT: call FarProcInFarCode
+
+END


        


More information about the llvm-commits mailing list