[llvm] [ms] [llvm-ml] Implement support for PROC NEAR/FAR (PR #131707)

Eric Astor via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 17 18:56:44 PDT 2025


https://github.com/ericastor created https://github.com/llvm/llvm-project/pull/131707

Matches ML.EXE by translating "ret" instructions inside a `PROC FAR` to "retf", and automatically prepending a `push cs` to all near calls to a `PROC FAR`.

>From f789159627ecc3eb6f65c18e8290cc043d76dafc Mon Sep 17 00:00:00 2001
From: Eric Astor <epastor at google.com>
Date: Mon, 17 Mar 2025 21:48:58 +0000
Subject: [PATCH] [ms] [llvm-ml] Implement support for PROC NEAR/FAR

Matches ML.EXE by translating "ret" instructions inside a `PROC FAR` to "retf", and automatically prepending a `push cs` to all near calls to a `PROC FAR`.
---
 llvm/include/llvm/MC/MCContext.h              |  7 ++
 llvm/include/llvm/MC/MCSymbolCOFF.h           |  8 +++
 llvm/lib/MC/MCParser/COFFMasmParser.cpp       | 72 ++++++++++++++-----
 .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 54 +++++++++++++-
 llvm/test/tools/llvm-ml/proc_distance.asm     | 56 +++++++++++++++
 5 files changed, 178 insertions(+), 19 deletions(-)
 create mode 100644 llvm/test/tools/llvm-ml/proc_distance.asm

diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index e97c890ce9135..70b90834f1edc 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -97,9 +97,13 @@ class MCContext {
     IsDXContainer
   };
 
+  enum DefaultRetType { IsNear, IsFar };
+
 private:
   Environment Env;
 
+  DefaultRetType DefaultRet = IsNear;
+
   /// The name of the Segment where Swift5 Reflection Section data will be
   /// outputted
   StringRef Swift5ReflectionSegmentName;
@@ -394,6 +398,9 @@ class MCContext {
 
   Environment getObjectFileType() const { return Env; }
 
+  DefaultRetType getDefaultRetType() const { return DefaultRet; }
+  void setDefaultRetType(DefaultRetType DR) { DefaultRet = DR; }
+
   const StringRef &getSwift5ReflectionSegmentName() const {
     return Swift5ReflectionSegmentName;
   }
diff --git a/llvm/include/llvm/MC/MCSymbolCOFF.h b/llvm/include/llvm/MC/MCSymbolCOFF.h
index 2964c521e8e44..badcbbcd865c6 100644
--- a/llvm/include/llvm/MC/MCSymbolCOFF.h
+++ b/llvm/include/llvm/MC/MCSymbolCOFF.h
@@ -25,6 +25,7 @@ class MCSymbolCOFF : public MCSymbol {
     SF_ClassShift = 0,
 
     SF_SafeSEH = 0x0100,
+    SF_FarProc = 0x0200,
     SF_WeakExternalCharacteristicsMask = 0x0E00,
     SF_WeakExternalCharacteristicsShift = 9,
   };
@@ -66,6 +67,13 @@ class MCSymbolCOFF : public MCSymbol {
     modifyFlags(SF_SafeSEH, SF_SafeSEH);
   }
 
+  bool isFarProc() const {
+    return getFlags() & SF_FarProc;
+  }
+  void setIsFarProc() const {
+    modifyFlags(SF_FarProc, SF_FarProc);
+  }
+
   static bool classof(const MCSymbol *S) { return S->isCOFF(); }
 };
 
diff --git a/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/llvm/lib/MC/MCParser/COFFMasmParser.cpp
index 8464a2392680b..4ed73e6d93be0 100644
--- a/llvm/lib/MC/MCParser/COFFMasmParser.cpp
+++ b/llvm/lib/MC/MCParser/COFFMasmParser.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "third_party/llvm/llvm-project/llvm/include/llvm/MC/MCContext.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/COFF.h"
@@ -41,6 +42,7 @@ class COFFMasmParser : public MCAsmParserExtension {
                           StringRef COMDATSymName, COFF::COMDATType Type,
                           Align Alignment);
 
+  bool parseDirectiveModel(StringRef, SMLoc);
   bool parseDirectiveProc(StringRef, SMLoc);
   bool parseDirectiveEndProc(StringRef, SMLoc);
   bool parseDirectiveSegment(StringRef, SMLoc);
@@ -167,7 +169,7 @@ class COFFMasmParser : public MCAsmParserExtension {
     // .exit
     // .fardata
     // .fardata?
-    addDirectiveHandler<&COFFMasmParser::IgnoreDirective>(".model");
+    addDirectiveHandler<&COFFMasmParser::parseDirectiveModel>(".model");
     // .stack
     // .startup
 
@@ -201,8 +203,13 @@ class COFFMasmParser : public MCAsmParserExtension {
   }
 
   /// Stack of active procedure definitions.
-  SmallVector<StringRef, 1> CurrentProcedures;
-  SmallVector<bool, 1> CurrentProceduresFramed;
+  enum ProcDistance { PROC_DISTANCE_NEAR = 0, PROC_DISTANCE_FAR = 1 };
+  struct ProcInfo {
+    StringRef Name;
+    ProcDistance Distance = PROC_DISTANCE_NEAR;
+    bool IsFramed = false;
+  };
+  SmallVector<ProcInfo, 1> CurrentProcedures;
 
 public:
   COFFMasmParser() = default;
@@ -435,48 +442,75 @@ bool COFFMasmParser::parseDirectiveOption(StringRef Directive, SMLoc Loc) {
   return false;
 }
 
+/// parseDirectiveModel
+///  ::= ".model" "flat"
+bool COFFMasmParser::parseDirectiveModel(StringRef Directive, SMLoc Loc) {
+  if (!getLexer().is(AsmToken::Identifier))
+    return TokError("expected identifier in directive");
+
+  StringRef ModelType = getTok().getIdentifier();
+  if (!ModelType.equals_insensitive("flat")) {
+    return TokError(
+        "expected 'flat' for memory model; no other models supported");
+  }
+
+  // Ignore; no action necessary.
+  Lex();
+  return false;
+}
+
 /// parseDirectiveProc
 /// TODO(epastor): Implement parameters and other attributes.
-///  ::= label "proc" [[distance]]
+///  ::= label "proc" [[distance]] [[frame]]
 ///          statements
 ///      label "endproc"
 bool COFFMasmParser::parseDirectiveProc(StringRef Directive, SMLoc Loc) {
   if (!getStreamer().getCurrentFragment())
     return Error(getTok().getLoc(), "expected section directive");
 
-  StringRef Label;
-  if (getParser().parseIdentifier(Label))
+  ProcInfo Proc;
+  if (getParser().parseIdentifier(Proc.Name))
     return Error(Loc, "expected identifier for procedure");
   if (getLexer().is(AsmToken::Identifier)) {
     StringRef nextVal = getTok().getString();
     SMLoc nextLoc = getTok().getLoc();
     if (nextVal.equals_insensitive("far")) {
-      // TODO(epastor): Handle far procedure definitions.
       Lex();
-      return Error(nextLoc, "far procedure definitions not yet supported");
+      Proc.Distance = PROC_DISTANCE_FAR;
+      nextVal = getTok().getString();
+      nextLoc = getTok().getLoc();
     } else if (nextVal.equals_insensitive("near")) {
       Lex();
+      Proc.Distance = PROC_DISTANCE_NEAR;
       nextVal = getTok().getString();
       nextLoc = getTok().getLoc();
     }
   }
-  MCSymbolCOFF *Sym = cast<MCSymbolCOFF>(getContext().getOrCreateSymbol(Label));
+  MCSymbolCOFF *Sym =
+      cast<MCSymbolCOFF>(getContext().getOrCreateSymbol(Proc.Name));
 
   // Define symbol as simple external function
   Sym->setExternal(true);
   Sym->setType(COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT);
+  if (Proc.Distance == PROC_DISTANCE_FAR)
+    Sym->setIsFarProc();
+
+  getContext().setDefaultRetType(Proc.Distance == PROC_DISTANCE_NEAR
+                                     ? MCContext::IsNear
+                                     : MCContext::IsFar);
 
-  bool Framed = false;
   if (getLexer().is(AsmToken::Identifier) &&
       getTok().getString().equals_insensitive("frame")) {
     Lex();
-    Framed = true;
+    Proc.IsFramed = true;
+    getStreamer().emitWinCFIStartProc(Sym, Loc);
+  }
+  if (Proc.IsFramed) {
     getStreamer().emitWinCFIStartProc(Sym, Loc);
   }
   getStreamer().emitLabel(Sym, Loc);
 
-  CurrentProcedures.push_back(Label);
-  CurrentProceduresFramed.push_back(Framed);
+  CurrentProcedures.push_back(std::move(Proc));
   return false;
 }
 bool COFFMasmParser::parseDirectiveEndProc(StringRef Directive, SMLoc Loc) {
@@ -487,15 +521,19 @@ bool COFFMasmParser::parseDirectiveEndProc(StringRef Directive, SMLoc Loc) {
 
   if (CurrentProcedures.empty())
     return Error(Loc, "endp outside of procedure block");
-  else if (!CurrentProcedures.back().equals_insensitive(Label))
+  else if (!CurrentProcedures.back().Name.equals_insensitive(Label))
     return Error(LabelLoc, "endp does not match current procedure '" +
-                               CurrentProcedures.back() + "'");
+                               CurrentProcedures.back().Name + "'");
 
-  if (CurrentProceduresFramed.back()) {
+  if (CurrentProcedures.back().IsFramed) {
     getStreamer().emitWinCFIEndProc(Loc);
   }
   CurrentProcedures.pop_back();
-  CurrentProceduresFramed.pop_back();
+  getContext().setDefaultRetType(
+      (CurrentProcedures.empty() ||
+       CurrentProcedures.back().Distance == PROC_DISTANCE_NEAR)
+          ? MCContext::IsNear
+          : MCContext::IsFar);
   return false;
 }
 
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index a6285a55f4155..fed7faafd9460 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -14,6 +14,7 @@
 #include "MCTargetDesc/X86TargetStreamer.h"
 #include "TargetInfo/X86TargetInfo.h"
 #include "X86Operand.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/MC/MCInst.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
@@ -32,6 +33,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolCOFF.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
@@ -1202,6 +1204,10 @@ class X86AsmParser : public MCTargetAsmParser {
   void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
                          MCStreamer &Out, bool MatchingInlineAsm);
 
+  void MatchMASMFarCallToNear(SMLoc IDLoc, X86Operand &Op,
+                              OperandVector &Operands, MCStreamer &Out,
+                              bool MatchingInlineAsm);
+
   bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
                            bool MatchingInlineAsm);
 
@@ -2740,11 +2746,11 @@ bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
   if ((BaseReg || IndexReg || RegNo || DefaultBaseReg))
     Operands.push_back(X86Operand::CreateMem(
         getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End,
-        Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
+        Size, DefaultBaseReg, /*SymName=*/SM.getSymName(), /*OpDecl=*/nullptr,
         /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest));
   else
     Operands.push_back(X86Operand::CreateMem(
-        getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(),
+        getPointerWidth(), Disp, Start, End, Size, /*SymName=*/SM.getSymName(),
         /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
         MaybeDirectBranchDest));
   return false;
@@ -3442,6 +3448,14 @@ bool X86AsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
     }
   }
 
+  if (Parser.isParsingMasm() && !is64BitMode()) {
+    // MASM implicitly converts "ret" to "retf" in far procedures; this is
+    // reflected in the default return type in the MCContext.
+    if (PatchedName == "ret" &&
+        getContext().getDefaultRetType() == MCContext::IsFar)
+      PatchedName = "retf";
+  }
+
   // Determine whether this is an instruction prefix.
   // FIXME:
   // Enhance prefixes integrity robustness. for example, following forms
@@ -4130,6 +4144,11 @@ bool X86AsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   // First, handle aliases that expand to multiple instructions.
   MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
                     Out, MatchingInlineAsm);
+  if (getParser().isParsingMasm() && !is64BitMode()) {
+    MatchMASMFarCallToNear(IDLoc, static_cast<X86Operand &>(*Operands[0]),
+                           Operands, Out, MatchingInlineAsm);
+  }
+
   unsigned Prefixes = getPrefixes(Operands);
 
   MCInst Inst;
@@ -4191,6 +4210,37 @@ void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
   }
 }
 
+void X86AsmParser::MatchMASMFarCallToNear(SMLoc IDLoc, X86Operand &Op,
+                                          OperandVector &Operands,
+                                          MCStreamer &Out,
+                                          bool MatchingInlineAsm) {
+  // FIXME: This should be replaced with a real .td file alias mechanism.
+  // Also, MatchInstructionImpl should actually *do* the EmitInstruction
+  // call.
+  if (Op.getToken() != "call")
+    return;
+  // This is a call instruction...
+
+  X86Operand &Operand = static_cast<X86Operand &>(*Operands[1]);
+  MCSymbol *Sym = getContext().lookupSymbol(Operand.getSymName());
+  if (Sym == nullptr || !Sym->isInSection() || !Sym->isCOFF() ||
+      !dyn_cast<MCSymbolCOFF>(Sym)->isFarProc())
+    return;
+  // Sym is a reference to a far proc in a code section....
+
+  if (Out.getCurrentSectionOnly() == &Sym->getSection()) {
+    // This is a call to a symbol declared as a far proc, and will be emitted as
+    // a near call... so we need to explicitly push the code section register
+    // before the call.
+    MCInst Inst;
+    Inst.setOpcode(X86::PUSH32r);
+    Inst.addOperand(MCOperand::createReg(MCRegister(X86::CS)));
+    Inst.setLoc(IDLoc);
+    if (!MatchingInlineAsm)
+      emitInstruction(Inst, Operands, Out);
+  }
+}
+
 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
                                        const FeatureBitset &MissingFeatures,
                                        bool MatchingInlineAsm) {
diff --git a/llvm/test/tools/llvm-ml/proc_distance.asm b/llvm/test/tools/llvm-ml/proc_distance.asm
new file mode 100644
index 0000000000000..71db903640b42
--- /dev/null
+++ b/llvm/test/tools/llvm-ml/proc_distance.asm
@@ -0,0 +1,56 @@
+; RUN: llvm-ml -m32 -filetype=s %s /Fo - | FileCheck %s
+
+.code
+
+DefaultProc PROC
+  ret
+DefaultProc ENDP
+; CHECK: DefaultProc:
+; CHECK: {{^ *}}ret{{ *$}}
+
+t1:
+call DefaultProc
+; CHECK: t1:
+; CHECK-NEXT: call DefaultProc
+
+NearProc PROC NEAR
+  ret
+NearProc ENDP
+; CHECK: NearProc:
+; CHECK: {{^ *}}ret{{ *$}}
+
+t2:
+call NearProc
+; CHECK: t2:
+; CHECK-NEXT: call NearProc
+
+FarProcInCode PROC FAR
+  ret
+FarProcInCode ENDP
+; CHECK: FarProcInCode:
+; CHECK: {{^ *}}retf{{ *$}}
+
+t3:
+call FarProcInCode
+; CHECK: t3:
+; CHECK-NEXT: push cs
+; CHECK-NEXT: call FarProcInCode
+
+FarCode SEGMENT SHARED NOPAGE NOCACHE INFO READ WRITE EXECUTE DISCARD
+
+FarProcInFarCode PROC FAR
+  ret
+FarProcInFarCode ENDP
+; CHECK: FarProcInFarCode:
+; CHECK: {{^ *}}retf{{ *$}}
+
+FarCode ENDS
+
+.code
+
+t4:
+call FarProcInFarCode
+; CHECK: t4:
+; CHECK-NEXT: call FarProcInFarCode
+
+END



More information about the llvm-commits mailing list