[llvm] [aarch64][win] Add support for import call optimization (equivalent to MSVC /d2ImportCallOptimization) (PR #121516)

Daniel Paoliello via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 3 14:52:13 PST 2025


https://github.com/dpaoliello updated https://github.com/llvm/llvm-project/pull/121516

>From 29d50a116cd9a3547e2ab5b9707a0a685c33097d Mon Sep 17 00:00:00 2001
From: Daniel Paoliello <danpao at microsoft.com>
Date: Thu, 19 Dec 2024 14:11:48 -0800
Subject: [PATCH] [aarch64][win] Add support for import call optimization
 (equivalent to MSVC /d2ImportCallOptimization)

---
 llvm/include/llvm/CodeGen/MachineFunction.h   | 18 +++++
 llvm/include/llvm/CodeGen/SelectionDAG.h      | 14 ++++
 llvm/include/llvm/MC/MCObjectFileInfo.h       |  5 ++
 llvm/include/llvm/MC/MCStreamer.h             |  3 +
 llvm/include/llvm/MC/MCWinCOFFObjectWriter.h  |  2 +
 llvm/include/llvm/MC/MCWinCOFFStreamer.h      |  1 +
 .../SelectionDAG/ScheduleDAGSDNodes.cpp       |  3 +
 llvm/lib/MC/MCAsmStreamer.cpp                 |  9 +++
 llvm/lib/MC/MCObjectFileInfo.cpp              |  5 ++
 llvm/lib/MC/MCParser/COFFAsmParser.cpp        | 21 +++++
 llvm/lib/MC/MCStreamer.cpp                    |  2 +
 llvm/lib/MC/MCWinCOFFStreamer.cpp             |  9 +++
 llvm/lib/MC/WinCOFFObjectWriter.cpp           | 78 +++++++++++++++++++
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 42 ++++++++++
 .../Target/AArch64/AArch64ISelLowering.cpp    | 12 ++-
 .../win-import-call-optimization-nocalls.ll   | 18 +++++
 .../AArch64/win-import-call-optimization.ll   | 36 +++++++++
 .../win-import-call-optimization-no-section.s |  9 +++
 .../MC/AArch64/win-import-call-optimization.s | 62 +++++++++++++++
 ...n-import-call-optimization-not-supported.s | 13 ++++
 ...in-import-call-optimization-syntax-error.s | 14 ++++
 21 files changed, 372 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/win-import-call-optimization-nocalls.ll
 create mode 100644 llvm/test/CodeGen/AArch64/win-import-call-optimization.ll
 create mode 100644 llvm/test/MC/AArch64/win-import-call-optimization-no-section.s
 create mode 100644 llvm/test/MC/AArch64/win-import-call-optimization.s
 create mode 100644 llvm/test/MC/COFF/win-import-call-optimization-not-supported.s
 create mode 100644 llvm/test/MC/COFF/win-import-call-optimization-syntax-error.s

diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index d696add8a1af53..520f1745de2979 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -354,6 +354,11 @@ class LLVM_ABI MachineFunction {
   /// a table of valid targets for Windows EHCont Guard.
   std::vector<MCSymbol *> CatchretTargets;
 
+  /// Mapping of call instruction to the global value and target flags that it
+  /// calls, if applicable.
+  DenseMap<const MachineInstr *, std::pair<const GlobalValue *, unsigned>>
+      CalledGlobalsMap;
+
   /// \name Exception Handling
   /// \{
 
@@ -1182,6 +1187,19 @@ class LLVM_ABI MachineFunction {
     CatchretTargets.push_back(Target);
   }
 
+  /// Tries to get the global and target flags for a call site, if the
+  /// instruction is a call to a global.
+  std::pair<const GlobalValue *, unsigned>
+  tryGetCalledGlobal(const MachineInstr *MI) const {
+    return CalledGlobalsMap.lookup(MI);
+  }
+
+  /// Notes the global and target flags for a call site.
+  void addCalledGlobal(const MachineInstr *MI,
+                       std::pair<const GlobalValue *, unsigned> Details) {
+    CalledGlobalsMap.insert({MI, Details});
+  }
+
   /// \name Exception Handling
   /// \{
 
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index ff7caec41855fd..b31ad11c3ee0ee 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -293,6 +293,7 @@ class SelectionDAG {
     MDNode *HeapAllocSite = nullptr;
     MDNode *PCSections = nullptr;
     MDNode *MMRA = nullptr;
+    std::pair<const GlobalValue *, unsigned> CalledGlobal{};
     bool NoMerge = false;
   };
   /// Out-of-line extra information for SDNodes.
@@ -2373,6 +2374,19 @@ class SelectionDAG {
     auto It = SDEI.find(Node);
     return It != SDEI.end() ? It->second.MMRA : nullptr;
   }
+  /// Set CalledGlobal to be associated with Node.
+  void addCalledGlobal(const SDNode *Node, const GlobalValue *GV,
+                       unsigned OpFlags) {
+    SDEI[Node].CalledGlobal = {GV, OpFlags};
+  }
+  /// Return CalledGlobal associated with Node, or a nullopt if none exists.
+  std::optional<std::pair<const GlobalValue *, unsigned>>
+  getCalledGlobal(const SDNode *Node) {
+    auto I = SDEI.find(Node);
+    return I != SDEI.end()
+               ? std::make_optional(std::move(I->second).CalledGlobal)
+               : std::nullopt;
+  }
   /// Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
   void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge) {
     if (NoMerge)
diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h
index e2a2c84e47910b..fb575fe721015c 100644
--- a/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -73,6 +73,10 @@ class MCObjectFileInfo {
   /// to emit them into.
   MCSection *CompactUnwindSection = nullptr;
 
+  /// If import call optimization is supported by the target, this is the
+  /// section to emit import call data to.
+  MCSection *ImportCallSection = nullptr;
+
   // Dwarf sections for debug info.  If a target supports debug info, these must
   // be set.
   MCSection *DwarfAbbrevSection = nullptr;
@@ -269,6 +273,7 @@ class MCObjectFileInfo {
   MCSection *getBSSSection() const { return BSSSection; }
   MCSection *getReadOnlySection() const { return ReadOnlySection; }
   MCSection *getLSDASection() const { return LSDASection; }
+  MCSection *getImportCallSection() const { return ImportCallSection; }
   MCSection *getCompactUnwindSection() const { return CompactUnwindSection; }
   MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; }
   MCSection *getDwarfInfoSection() const { return DwarfInfoSection; }
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index 21da4dac4872b4..c82ce4428ed09c 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -569,6 +569,9 @@ class MCStreamer {
   /// \param Symbol - Symbol the image relative relocation should point to.
   virtual void emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset);
 
+  /// Emits an import call directive, used to build the import call table.
+  virtual void emitCOFFImpCall(MCSymbol const *Symbol);
+
   /// Emits an lcomm directive with XCOFF csect information.
   ///
   /// \param LabelSym - Label on the block of storage.
diff --git a/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h b/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h
index a4ede61e45099d..00a132706879f2 100644
--- a/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h
+++ b/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h
@@ -72,6 +72,8 @@ class WinCOFFObjectWriter final : public MCObjectWriter {
                         const MCFixup &Fixup, MCValue Target,
                         uint64_t &FixedValue) override;
   uint64_t writeObject(MCAssembler &Asm) override;
+  void recordImportCall(const MCDataFragment &FB, const MCSymbol *Symbol);
+  bool hasRecordedImportCalls() const;
 };
 
 /// Construct a new Win COFF writer instance.
diff --git a/llvm/include/llvm/MC/MCWinCOFFStreamer.h b/llvm/include/llvm/MC/MCWinCOFFStreamer.h
index 5c39d80538944b..2318d1b8e0a223 100644
--- a/llvm/include/llvm/MC/MCWinCOFFStreamer.h
+++ b/llvm/include/llvm/MC/MCWinCOFFStreamer.h
@@ -58,6 +58,7 @@ class MCWinCOFFStreamer : public MCObjectStreamer {
   void emitCOFFSectionIndex(MCSymbol const *Symbol) override;
   void emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override;
   void emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override;
+  void emitCOFFImpCall(MCSymbol const *Symbol) override;
   void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                         Align ByteAlignment) override;
   void emitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 26fc75c0578ec2..6744e7cd2ecfcf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -908,6 +908,9 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
         It->setMMRAMetadata(MF, MMRA);
     }
 
+    if (auto CalledGlobal = DAG->getCalledGlobal(Node))
+      MF.addCalledGlobal(MI, *CalledGlobal);
+
     return MI;
   };
 
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 01fe11ed205017..e4161c9478468d 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -209,6 +209,7 @@ class MCAsmStreamer final : public MCStreamer {
   void emitCOFFSectionIndex(MCSymbol const *Symbol) override;
   void emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override;
   void emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override;
+  void emitCOFFImpCall(MCSymbol const *Symbol) override;
   void emitXCOFFLocalCommonSymbol(MCSymbol *LabelSym, uint64_t Size,
                                   MCSymbol *CsectSym, Align Alignment) override;
   void emitXCOFFSymbolLinkageWithVisibility(MCSymbol *Symbol,
@@ -893,6 +894,14 @@ void MCAsmStreamer::emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) {
   EmitEOL();
 }
 
+void MCAsmStreamer::emitCOFFImpCall(MCSymbol const *Symbol) {
+  assert(this->getContext().getObjectFileInfo()->getImportCallSection() &&
+         "This target does not have a import call section");
+  OS << "\t.impcall\t";
+  Symbol->print(OS, MAI);
+  EmitEOL();
+}
+
 // We need an XCOFF-specific version of this directive as the AIX syntax
 // requires a QualName argument identifying the csect name and storage mapping
 // class to appear before the alignment if we are specifying it.
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index f37e138edc36b1..150e38a94db6a6 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -596,6 +596,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
                                           COFF::IMAGE_SCN_MEM_READ);
   }
 
+  if (T.getArch() == Triple::aarch64) {
+    ImportCallSection =
+        Ctx->getCOFFSection(".impcall", COFF::IMAGE_SCN_LNK_INFO);
+  }
+
   // Debug info.
   COFFDebugSymbolsSection =
       Ctx->getCOFFSection(".debug$S", (COFF::IMAGE_SCN_MEM_DISCARDABLE |
diff --git a/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/llvm/lib/MC/MCParser/COFFAsmParser.cpp
index 4d95a720852835..3da293b2312367 100644
--- a/llvm/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/COFFAsmParser.cpp
@@ -12,6 +12,7 @@
 #include "llvm/BinaryFormat/COFF.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
 #include "llvm/MC/MCSectionCOFF.h"
@@ -70,6 +71,7 @@ class COFFAsmParser : public MCAsmParserExtension {
     addDirectiveHandler<&COFFAsmParser::parseDirectiveSymbolAttribute>(
         ".weak_anti_dep");
     addDirectiveHandler<&COFFAsmParser::parseDirectiveCGProfile>(".cg_profile");
+    addDirectiveHandler<&COFFAsmParser::parseDirectiveImpCall>(".impcall");
 
     // Win64 EH directives.
     addDirectiveHandler<&COFFAsmParser::parseSEHDirectiveStartProc>(
@@ -126,6 +128,7 @@ class COFFAsmParser : public MCAsmParserExtension {
   bool parseDirectiveLinkOnce(StringRef, SMLoc);
   bool parseDirectiveRVA(StringRef, SMLoc);
   bool parseDirectiveCGProfile(StringRef, SMLoc);
+  bool parseDirectiveImpCall(StringRef, SMLoc);
 
   // Win64 EH directives.
   bool parseSEHDirectiveStartProc(StringRef, SMLoc);
@@ -577,6 +580,24 @@ bool COFFAsmParser::parseDirectiveSymIdx(StringRef, SMLoc) {
   return false;
 }
 
+bool COFFAsmParser::parseDirectiveImpCall(StringRef, SMLoc) {
+  if (!getContext().getObjectFileInfo()->getImportCallSection())
+    return TokError("target does not have an import call section");
+
+  StringRef SymbolID;
+  if (getParser().parseIdentifier(SymbolID))
+    return TokError("expected identifier in directive");
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
+
+  Lex();
+  getStreamer().emitCOFFImpCall(Symbol);
+  return false;
+}
+
 /// ::= [ identifier ]
 bool COFFAsmParser::parseCOMDATType(COFF::COMDATType &Type) {
   StringRef TypeId = getTok().getIdentifier();
diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp
index ccf65df150e786..ee26fc07313f18 100644
--- a/llvm/lib/MC/MCStreamer.cpp
+++ b/llvm/lib/MC/MCStreamer.cpp
@@ -1023,6 +1023,8 @@ void MCStreamer::emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) {}
 
 void MCStreamer::emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) {}
 
+void MCStreamer::emitCOFFImpCall(MCSymbol const *Symbol) {}
+
 /// EmitRawText - If this file is backed by an assembly streamer, this dumps
 /// the specified string in the output .s file.  This capability is
 /// indicated by the hasRawTextSupport() predicate.
diff --git a/llvm/lib/MC/MCWinCOFFStreamer.cpp b/llvm/lib/MC/MCWinCOFFStreamer.cpp
index 395d4db3103d78..073af9527523a9 100644
--- a/llvm/lib/MC/MCWinCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCWinCOFFStreamer.cpp
@@ -280,6 +280,15 @@ void MCWinCOFFStreamer::emitCOFFImgRel32(const MCSymbol *Symbol,
   DF->appendContents(4, 0);
 }
 
+void MCWinCOFFStreamer::emitCOFFImpCall(MCSymbol const *Symbol) {
+  assert(this->getContext().getObjectFileInfo()->getImportCallSection() &&
+         "This target does not have a import call section");
+
+  auto *DF = getOrCreateDataFragment();
+  getAssembler().registerSymbol(*Symbol);
+  getWriter().recordImportCall(*DF, Symbol);
+}
+
 void MCWinCOFFStreamer::emitCommonSymbol(MCSymbol *S, uint64_t Size,
                                          Align ByteAlignment) {
   auto *Symbol = cast<MCSymbolCOFF>(S);
diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 09d2b08e43050f..527464fa54ce02 100644
--- a/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -23,6 +23,7 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCFragment.h"
+#include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSectionCOFF.h"
@@ -147,6 +148,13 @@ class llvm::WinCOFFWriter {
   bool UseBigObj;
   bool UseOffsetLabels = false;
 
+  struct ImportCall {
+    unsigned CallsiteOffset;
+    const MCSymbol *CalledSymbol;
+  };
+  using importcall_map = MapVector<MCSection *, std::vector<ImportCall>>;
+  importcall_map SectionToImportCallsMap;
+
 public:
   enum DwoMode {
     AllSections,
@@ -163,6 +171,11 @@ class llvm::WinCOFFWriter {
                         const MCFixup &Fixup, MCValue Target,
                         uint64_t &FixedValue);
   uint64_t writeObject(MCAssembler &Asm);
+  void generateAArch64ImportCallSection(llvm::MCAssembler &Asm);
+  void recordImportCall(const MCDataFragment &FB, const MCSymbol *Symbol);
+  bool hasRecordedImportCalls() const {
+    return !SectionToImportCallsMap.empty();
+  }
 
 private:
   COFFSymbol *createSymbol(StringRef Name);
@@ -1097,6 +1110,17 @@ uint64_t WinCOFFWriter::writeObject(MCAssembler &Asm) {
     }
   }
 
+  // Create the contents of the import call section.
+  if (hasRecordedImportCalls()) {
+    switch (Asm.getContext().getTargetTriple().getArch()) {
+    case Triple::aarch64:
+      generateAArch64ImportCallSection(Asm);
+      break;
+    default:
+      llvm_unreachable("unsupported architecture for import call section");
+    }
+  }
+
   assignFileOffsets(Asm);
 
   // MS LINK expects to be able to use this timestamp to implement their
@@ -1143,6 +1167,51 @@ uint64_t WinCOFFWriter::writeObject(MCAssembler &Asm) {
   return W.OS.tell() - StartOffset;
 }
 
+void llvm::WinCOFFWriter::generateAArch64ImportCallSection(
+    llvm::MCAssembler &Asm) {
+  auto *ImpCallSection =
+      Asm.getContext().getObjectFileInfo()->getImportCallSection();
+
+  if (!SectionMap.contains(ImpCallSection)) {
+    Asm.getContext().reportError(SMLoc(),
+                                 ".impcall directives were used, but no "
+                                 "existing .impcall section exists");
+    return;
+  }
+
+  auto *Frag = cast<MCDataFragment>(ImpCallSection->curFragList()->Head);
+  raw_svector_ostream OS(Frag->getContents());
+
+  // Layout of this section is:
+  // Per section that contains calls to imported functions:
+  //  uint32_t SectionSize: Size in bytes for information in this section.
+  //  uint32_t Section Number
+  //  Per call to imported function in section:
+  //    uint32_t Kind: the kind of imported function.
+  //    uint32_t BranchOffset: the offset of the branch instruction in its
+  //                            parent section.
+  //    uint32_t TargetSymbolId: the symbol id of the called function.
+
+  // Per section that contained eligible targets...
+  for (auto &[Section, Targets] : SectionToImportCallsMap) {
+    unsigned SectionSize = sizeof(uint32_t) * (2 + 3 * Targets.size());
+    support::endian::write(OS, SectionSize, W.Endian);
+    support::endian::write(OS, SectionMap.at(Section)->Number, W.Endian);
+    for (auto &[BranchOffset, TargetSymbol] : Targets) {
+      // Kind is always IMAGE_REL_ARM64_DYNAMIC_IMPORT_CALL (0x13).
+      support::endian::write(OS, 0x13, W.Endian);
+      support::endian::write(OS, BranchOffset, W.Endian);
+      support::endian::write(OS, TargetSymbol->getIndex(), W.Endian);
+    }
+  }
+}
+
+void WinCOFFWriter::recordImportCall(const MCDataFragment &FB,
+                                     const MCSymbol *Symbol) {
+  auto &SectionData = SectionToImportCallsMap[FB.getParent()];
+  SectionData.push_back(ImportCall{unsigned(FB.getContents().size()), Symbol});
+}
+
 //------------------------------------------------------------------------------
 // WinCOFFObjectWriter class implementation
 
@@ -1194,6 +1263,15 @@ uint64_t WinCOFFObjectWriter::writeObject(MCAssembler &Asm) {
   return TotalSize;
 }
 
+void WinCOFFObjectWriter::recordImportCall(const MCDataFragment &FB,
+                                           const MCSymbol *Symbol) {
+  ObjWriter->recordImportCall(FB, Symbol);
+}
+
+bool WinCOFFObjectWriter::hasRecordedImportCalls() const {
+  return ObjWriter->hasRecordedImportCalls();
+}
+
 MCWinCOFFObjectTargetWriter::MCWinCOFFObjectTargetWriter(unsigned Machine_)
     : Machine(Machine_) {}
 
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 9bec782ca8ce97..4c03f876465051 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -77,6 +77,11 @@ static cl::opt<PtrauthCheckMode> PtrauthAuthChecks(
     cl::desc("Check pointer authentication auth/resign failures"),
     cl::init(Default));
 
+static cl::opt<bool> EnableImportCallOptimization(
+    "aarch64-win-import-call-optimization", cl::Hidden,
+    cl::desc("Enable import call optimization for AArch64 Windows"),
+    cl::init(false));
+
 #define DEBUG_TYPE "asm-printer"
 
 namespace {
@@ -293,6 +298,11 @@ class AArch64AsmPrinter : public AsmPrinter {
                               MCSymbol *LazyPointer) override;
   void emitMachOIFuncStubHelperBody(Module &M, const GlobalIFunc &GI,
                                     MCSymbol *LazyPointer) override;
+
+  /// Checks if this instruction is part of a sequence that is eligle for import
+  /// call optimization and, if so, records it to be emitted in the import call
+  /// section.
+  void recordIfImportCall(const MachineInstr *BranchInst);
 };
 
 } // end anonymous namespace
@@ -921,6 +931,15 @@ void AArch64AsmPrinter::emitEndOfAsmFile(Module &M) {
   // Emit stack and fault map information.
   FM.serializeToFaultMapSection();
 
+  // If import call optimization is enabled, emit the appropriate section.
+  // We do this whether or not we recorded any import calls.
+  if (EnableImportCallOptimization && TT.isOSBinFormatCOFF()) {
+    OutStreamer->switchSection(getObjFileLowering().getImportCallSection());
+
+    // Section always starts with some magic.
+    constexpr char ImpCallMagic[12] = "Imp_Call_V1";
+    OutStreamer->emitBytes(StringRef(ImpCallMagic, sizeof(ImpCallMagic)));
+  }
 }
 
 void AArch64AsmPrinter::emitLOHs() {
@@ -2693,6 +2712,7 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
   case AArch64::TCRETURNrinotx16:
   case AArch64::TCRETURNriALL: {
     emitPtrauthTailCallHardening(MI);
+    recordIfImportCall(MI);
 
     MCInst TmpInst;
     TmpInst.setOpcode(AArch64::BR);
@@ -2702,6 +2722,7 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
   }
   case AArch64::TCRETURNdi: {
     emitPtrauthTailCallHardening(MI);
+    recordIfImportCall(MI);
 
     MCOperand Dest;
     MCInstLowering.lowerOperand(MI->getOperand(0), Dest);
@@ -3035,6 +3056,14 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
     TS->emitARM64WinCFISaveAnyRegQPX(MI->getOperand(0).getImm(),
                                      -MI->getOperand(2).getImm());
     return;
+
+  case AArch64::BLR:
+  case AArch64::BR:
+    recordIfImportCall(MI);
+    MCInst TmpInst;
+    MCInstLowering.Lower(MI, TmpInst);
+    EmitToStreamer(*OutStreamer, TmpInst);
+    return;
   }
 
   // Finally, do the automated lowerings for everything else.
@@ -3043,6 +3072,19 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
   EmitToStreamer(*OutStreamer, TmpInst);
 }
 
+void AArch64AsmPrinter::recordIfImportCall(
+    const llvm::MachineInstr *BranchInst) {
+  if (!EnableImportCallOptimization ||
+      !TM.getTargetTriple().isOSBinFormatCOFF())
+    return;
+
+  auto [GV, OpFlags] = BranchInst->getMF()->tryGetCalledGlobal(BranchInst);
+  if (GV && GV->hasDLLImportStorageClass()) {
+    OutStreamer->emitCOFFImpCall(
+        MCInstLowering.GetGlobalValueSymbol(GV, OpFlags));
+  }
+}
+
 void AArch64AsmPrinter::emitMachOIFuncStubBody(Module &M, const GlobalIFunc &GI,
                                                MCSymbol *LazyPointer) {
   // _ifunc:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 070163a5fb297c..e482f67d3fea00 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9386,12 +9386,14 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
   // node so that legalize doesn't hack it.
+  const GlobalValue *CalledGlobal = nullptr;
+  unsigned OpFlags = 0;
   if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    auto GV = G->getGlobal();
-    unsigned OpFlags =
-        Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
+    CalledGlobal = G->getGlobal();
+    OpFlags = Subtarget->classifyGlobalFunctionReference(CalledGlobal,
+                                                         getTargetMachine());
     if (OpFlags & AArch64II::MO_GOT) {
-      Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
+      Callee = DAG.getTargetGlobalAddress(CalledGlobal, DL, PtrVT, 0, OpFlags);
       Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
     } else {
       const GlobalValue *GV = G->getGlobal();
@@ -9511,6 +9513,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
 
     DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
     DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
+    DAG.addCalledGlobal(Ret.getNode(), CalledGlobal, OpFlags);
     return Ret;
   }
 
@@ -9522,6 +9525,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
   InGlue = Chain.getValue(1);
   DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
+  DAG.addCalledGlobal(Chain.getNode(), CalledGlobal, OpFlags);
 
   uint64_t CalleePopBytes =
       DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
diff --git a/llvm/test/CodeGen/AArch64/win-import-call-optimization-nocalls.ll b/llvm/test/CodeGen/AArch64/win-import-call-optimization-nocalls.ll
new file mode 100644
index 00000000000000..e2e3ff2be7bebd
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/win-import-call-optimization-nocalls.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=aarch64-pc-windows-msvc -aarch64-win-import-call-optimization < %s | FileCheck %s
+
+define dso_local void @normal_call() local_unnamed_addr {
+entry:
+  call void @a()
+  ret void
+}
+; CHECK-LABEL:  normal_call:
+; CHECK-NOT:    .impcall
+; CHECK:        bl a
+
+declare void @a() local_unnamed_addr
+
+; Even if there are no calls to imported functions, we still need to emit the
+; .impcall section.
+
+; CHECK-LABEL  .section   .impcall,"yi"
+; CHECK-NEXT   .asciz     "Imp_Call_V1"
diff --git a/llvm/test/CodeGen/AArch64/win-import-call-optimization.ll b/llvm/test/CodeGen/AArch64/win-import-call-optimization.ll
new file mode 100644
index 00000000000000..f35985fade5798
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/win-import-call-optimization.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=aarch64-pc-windows-msvc -aarch64-win-import-call-optimization < %s | FileCheck %s --check-prefix=CHECK-ENABLED
+; RUN: llc -mtriple=aarch64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK-DISABLED
+
+; CHECK-DISABLED-NOT: .impcall
+; CHECK-DISABLED-NOT: .section        .impcall
+
+define dso_local void @normal_call() local_unnamed_addr {
+entry:
+  call void @a()
+  call void @a()
+  ret void
+}
+; CHECK-ENABLED-LABEL:  normal_call:
+; CHECK-ENABLED:        adrp    [[ADRPREG:x[0-9]+]], __imp_a
+; CHECK-ENABLED-NEXT:   ldr     [[LDRREG:x[0-9]+]], [[[ADRPREG]], :lo12:__imp_a]
+; CHECK-ENABLED-NEXT:   .impcall        __imp_a
+; CHECK-ENABLED-NEXT:   blr     [[LDRREG]]
+; CHECK-ENABLED-NEXT:   .impcall        __imp_a
+; CHECK-ENABLED-NEXT:   blr     [[LDRREG]]
+
+define dso_local void @tail_call() local_unnamed_addr {
+entry:
+  tail call void @b()
+  ret void
+}
+; CHECK-ENABLED-LABEL:  tail_call:
+; CHECK-ENABLED:        adrp    [[ADRPREG:x[0-9]+]], __imp_b
+; CHECK-ENABLED-NEXT:   ldr     [[LDRREG:x[0-9]+]], [[[ADRPREG]], :lo12:__imp_b]
+; CHECK-ENABLED-NEXT:   .impcall        __imp_b
+; CHECK-ENABLED-NEXT:   br      [[LDRREG]]
+
+declare dllimport void @a() local_unnamed_addr
+declare dllimport void @b() local_unnamed_addr
+
+; CHECK-ENABLED-LABEL  .section   .impcall,"yi"
+; CHECK-ENABLED-NEXT   .asciz     "Imp_Call_V1"
diff --git a/llvm/test/MC/AArch64/win-import-call-optimization-no-section.s b/llvm/test/MC/AArch64/win-import-call-optimization-no-section.s
new file mode 100644
index 00000000000000..9aee4cf9f8d27a
--- /dev/null
+++ b/llvm/test/MC/AArch64/win-import-call-optimization-no-section.s
@@ -0,0 +1,9 @@
+// RUN: not llvm-mc -triple aarch64-windows-msvc -filetype obj < %s 2>&1 | FileCheck %s
+
+tail_call:
+  adrp    x8, __imp_b
+  ldr     x8, [x8, :lo12:__imp_b]
+  .impcall        __imp_b
+  br     x8
+
+// CHECK: error: .impcall directives were used, but no existing .impcall section exists
diff --git a/llvm/test/MC/AArch64/win-import-call-optimization.s b/llvm/test/MC/AArch64/win-import-call-optimization.s
new file mode 100644
index 00000000000000..b4656314f084ab
--- /dev/null
+++ b/llvm/test/MC/AArch64/win-import-call-optimization.s
@@ -0,0 +1,62 @@
+// RUN: llvm-mc -triple aarch64-windows-msvc -filetype obj -o %t.obj %s
+// RUN: llvm-readobj --sections --sd --relocs %t.obj | FileCheck %s
+
+.section        nc_sect,"xr"
+normal_call:
+  str     x30, [sp, #-16]!                // 8-byte Folded Spill
+  adrp    x8, __imp_a
+  ldr     x8, [x8, :lo12:__imp_a]
+  .impcall        __imp_a
+  blr     x8
+  ldr     x30, [sp], #16                  // 8-byte Folded Reload
+  ret
+
+.section        tc_sect,"xr"
+tail_call:
+  adrp    x8, __imp_b
+  ldr     x8, [x8, :lo12:__imp_b]
+  .impcall        __imp_b
+  br     x8
+
+.section        .impcall,"yi"
+.asciz  "Imp_Call_V1"
+
+// CHECK-LABEL: Name: .impcall (2E 69 6D 70 63 61 6C 6C)
+// CHECK-NEXT:  VirtualSize: 0x0
+// CHECK-NEXT:  VirtualAddress: 0x0
+// CHECK-NEXT:  RawDataSize: 52
+// CHECK-NEXT:  PointerToRawData: 0x150
+// CHECK-NEXT:  PointerToRelocations: 0x0
+// CHECK-NEXT:  PointerToLineNumbers: 0x0
+// CHECK-NEXT:  RelocationCount: 0
+// CHECK-NEXT:  LineNumberCount: 0
+// CHECK-NEXT:  Characteristics [ (0x100200)
+// CHECK-NEXT:    IMAGE_SCN_ALIGN_1BYTES (0x100000)
+// CHECK-NEXT:    IMAGE_SCN_LNK_INFO (0x200)
+// CHECK-NEXT:  ]
+// CHECK-NEXT:  SectionData (
+// CHECK-NEXT:    0000: 496D705F 43616C6C 5F563100 14000000  |Imp_Call_V1.....|
+// CHECK-NEXT:    0010:
+// CHECK-SAME:    [[#%.2X,NCSECT:]]000000
+// CHECK-SAME:    13000000
+// CHECK-SAME:    [[#%.2X,NCOFFSET:]]000000
+// CHECK-SAME:    [[#%.2X,NCSYM:]]000000
+// CHECK-NEXT:    0020:
+// CHECK-SAME:    14000000
+// CHECK-SAME:    [[#%.2X,TCSECT:]]000000
+// CHECK-SAME:    13000000
+// CHECK-SAME:    [[#%.2X,TCOFFSET:]]000000
+// CHECK-NEXT:    0030:
+// CHECK-SAME:    [[#%.2X,TCSYM:]]000000
+// CHECK-NEXT:  )
+
+// CHECK-LABEL: Relocations [
+// CHECK-NEXT:     Section ([[#%u,NCSECT]]) nc_sect {
+// CHECK-NEXT:       0x[[#%x,NCOFFSET - 8]] IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_a ([[#%u,NCSYM]])
+// CHECK-NEXT:       0x[[#%x,NCOFFSET - 4]] IMAGE_REL_ARM64_PAGEOFFSET_12L __imp_a ([[#%u,NCSYM]])
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Section ([[#%u,TCSECT]]) tc_sect {
+// CHECK-NEXT:       0x[[#%x,TCOFFSET - 8]] IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_b ([[#%u,TCSYM]])
+// CHECK-NEXT:       0x[[#%x,TCOFFSET - 4]] IMAGE_REL_ARM64_PAGEOFFSET_12L __imp_b ([[#%u,TCSYM]])
+// CHECK-NEXT:     }
+// CHECK-NEXT:   ]
diff --git a/llvm/test/MC/COFF/win-import-call-optimization-not-supported.s b/llvm/test/MC/COFF/win-import-call-optimization-not-supported.s
new file mode 100644
index 00000000000000..5467af15ccc6eb
--- /dev/null
+++ b/llvm/test/MC/COFF/win-import-call-optimization-not-supported.s
@@ -0,0 +1,13 @@
+// RUN: not llvm-mc -triple thumbv7a-windows-msvc < %s 2>&1 | FileCheck %s
+
+tail_call:
+  movw    r0, :lower16:__imp_a
+  movt    r0, :upper16:__imp_a
+  ldr     r0, [r0]
+  pop.w   {r11, lr}
+// CHECK: error: target does not have an import call section
+  .impcall __imp_a
+  bx      r0
+
+.section        .impcall,"yi"
+.ascii  "Imp_Call_V1"
diff --git a/llvm/test/MC/COFF/win-import-call-optimization-syntax-error.s b/llvm/test/MC/COFF/win-import-call-optimization-syntax-error.s
new file mode 100644
index 00000000000000..6447843e968ab1
--- /dev/null
+++ b/llvm/test/MC/COFF/win-import-call-optimization-syntax-error.s
@@ -0,0 +1,14 @@
+// RUN: not llvm-mc -triple aarch64-windows-msvc -filetype obj < %s 2>&1 | FileCheck %s
+
+tail_call:
+  adrp    x8, __imp_b
+  ldr     x8, [x8, :lo12:__imp_b]
+// CHECK: error: expected identifier in directive
+  .impcall
+  br     x8
+// CHECK: error: unexpected token in directive
+  .impcall        __imp_b x8
+  br     x8
+
+.section        .impcall,"yi"
+.ascii  "Imp_Call_V1"



More information about the llvm-commits mailing list