[llvm] [aarch64][win] Add support for import call optimization (equivalent to MSVC /d2ImportCallOptimization) (PR #121516)

Daniel Paoliello via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 10 15:26:04 PST 2025


https://github.com/dpaoliello updated https://github.com/llvm/llvm-project/pull/121516

>From f2ef7b0b530ac1664a3f4c5c03f3db135ac65174 Mon Sep 17 00:00:00 2001
From: Daniel Paoliello <danpao at microsoft.com>
Date: Thu, 19 Dec 2024 14:11:48 -0800
Subject: [PATCH] [aarch64][win] Add support for import call optimization
 (equivalent to MSVC /d2ImportCallOptimization)

---
 llvm/include/llvm/CodeGen/MIRYamlMapping.h    |  45 +++++--
 llvm/include/llvm/CodeGen/MachineFunction.h   |  25 ++++
 llvm/include/llvm/CodeGen/SelectionDAG.h      |  14 +++
 llvm/include/llvm/MC/MCObjectFileInfo.h       |   5 +
 llvm/include/llvm/MC/MCStreamer.h             |   8 ++
 llvm/include/llvm/MC/MCWinCOFFObjectWriter.h  |   1 +
 llvm/include/llvm/MC/MCWinCOFFStreamer.h      |   2 +
 llvm/lib/CodeGen/MIRParser/MIRParser.cpp      |  74 ++++++++++--
 llvm/lib/CodeGen/MIRPrinter.cpp               |  33 ++++-
 .../SelectionDAG/ScheduleDAGSDNodes.cpp       |   4 +
 llvm/lib/MC/MCAsmStreamer.cpp                 |  14 +++
 llvm/lib/MC/MCObjectFileInfo.cpp              |   5 +
 llvm/lib/MC/MCParser/COFFAsmParser.cpp        |  34 ++++++
 llvm/lib/MC/MCStreamer.cpp                    |   4 +
 llvm/lib/MC/MCWinCOFFStreamer.cpp             | 114 ++++++++++++++++++
 llvm/lib/MC/WinCOFFObjectWriter.cpp           |  27 +++--
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp |  72 +++++++++++
 .../Target/AArch64/AArch64ISelLowering.cpp    |  14 ++-
 .../win-import-call-optimization-nocalls.ll   |  18 +++
 .../AArch64/win-import-call-optimization.ll   |  48 ++++++++
 .../CodeGen/MIR/AArch64/called-globals.mir    |  61 ++++++++++
 .../CodeGen/MIR/X86/call-site-info-error1.mir |   2 +-
 .../CodeGen/MIR/X86/call-site-info-error2.mir |   2 +-
 .../MC/AArch64/win-import-call-optimization.s |  72 +++++++++++
 llvm/test/MC/COFF/bad-parse.s                 |  13 ++
 25 files changed, 673 insertions(+), 38 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/win-import-call-optimization-nocalls.ll
 create mode 100644 llvm/test/CodeGen/AArch64/win-import-call-optimization.ll
 create mode 100644 llvm/test/CodeGen/MIR/AArch64/called-globals.mir
 create mode 100644 llvm/test/MC/AArch64/win-import-call-optimization.s
 create mode 100644 llvm/test/MC/COFF/bad-parse.s

diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h
index 09a6ca936fe1f4..dbad3469d047d2 100644
--- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h
@@ -457,6 +457,16 @@ template <> struct ScalarTraits<FrameIndex> {
   static QuotingType mustQuote(StringRef S) { return needsQuotes(S); }
 };
 
+/// Identifies call instruction location in machine function.
+struct MachineInstrLoc {
+  unsigned BlockNum;
+  unsigned Offset;
+
+  bool operator==(const MachineInstrLoc &Other) const {
+    return BlockNum == Other.BlockNum && Offset == Other.Offset;
+  }
+};
+
 /// Serializable representation of CallSiteInfo.
 struct CallSiteInfo {
   // Representation of call argument and register which is used to
@@ -470,16 +480,6 @@ struct CallSiteInfo {
     }
   };
 
-  /// Identifies call instruction location in machine function.
-  struct MachineInstrLoc {
-    unsigned BlockNum;
-    unsigned Offset;
-
-    bool operator==(const MachineInstrLoc &Other) const {
-      return BlockNum == Other.BlockNum && Offset == Other.Offset;
-    }
-  };
-
   MachineInstrLoc CallLocation;
   std::vector<ArgRegPair> ArgForwardingRegs;
 
@@ -595,6 +595,26 @@ template <> struct MappingTraits<MachineJumpTable::Entry> {
   }
 };
 
+struct CalledGlobal {
+  MachineInstrLoc CallSite;
+  StringValue Callee;
+  unsigned Flags;
+
+  bool operator==(const CalledGlobal &Other) const {
+    return CallSite == Other.CallSite && Callee == Other.Callee &&
+           Flags == Other.Flags;
+  }
+};
+
+template <> struct MappingTraits<CalledGlobal> {
+  static void mapping(IO &YamlIO, CalledGlobal &CG) {
+    YamlIO.mapRequired("bb", CG.CallSite.BlockNum);
+    YamlIO.mapRequired("offset", CG.CallSite.Offset);
+    YamlIO.mapRequired("callee", CG.Callee);
+    YamlIO.mapRequired("flags", CG.Flags);
+  }
+};
+
 } // end namespace yaml
 } // end namespace llvm
 
@@ -606,6 +626,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::FixedMachineStackObject)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::CallSiteInfo)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineConstantPoolValue)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineJumpTable::Entry)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::CalledGlobal)
 
 namespace llvm {
 namespace yaml {
@@ -764,6 +785,7 @@ struct MachineFunction {
   std::vector<DebugValueSubstitution> DebugValueSubstitutions;
   MachineJumpTable JumpTableInfo;
   std::vector<StringValue> MachineMetadataNodes;
+  std::vector<CalledGlobal> CalledGlobals;
   BlockStringValue Body;
 };
 
@@ -822,6 +844,9 @@ template <> struct MappingTraits<MachineFunction> {
     if (!YamlIO.outputting() || !MF.MachineMetadataNodes.empty())
       YamlIO.mapOptional("machineMetadataNodes", MF.MachineMetadataNodes,
                          std::vector<StringValue>());
+    if (!YamlIO.outputting() || !MF.CalledGlobals.empty())
+      YamlIO.mapOptional("calledGlobals", MF.CalledGlobals,
+                         std::vector<CalledGlobal>());
     YamlIO.mapOptional("body", MF.Body, BlockStringValue());
   }
 };
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index d696add8a1af53..282aee2a69c4d9 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -354,6 +354,11 @@ class LLVM_ABI MachineFunction {
   /// a table of valid targets for Windows EHCont Guard.
   std::vector<MCSymbol *> CatchretTargets;
 
+  /// Mapping of call instruction to the global value and target flags that it
+  /// calls, if applicable.
+  DenseMap<const MachineInstr *, std::pair<const GlobalValue *, unsigned>>
+      CalledGlobalsMap;
+
   /// \name Exception Handling
   /// \{
 
@@ -1182,6 +1187,26 @@ class LLVM_ABI MachineFunction {
     CatchretTargets.push_back(Target);
   }
 
+  /// Tries to get the global and target flags for a call site, if the
+  /// instruction is a call to a global.
+  std::pair<const GlobalValue *, unsigned>
+  tryGetCalledGlobal(const MachineInstr *MI) const {
+    return CalledGlobalsMap.lookup(MI);
+  }
+
+  /// Notes the global and target flags for a call site.
+  void addCalledGlobal(const MachineInstr *MI,
+                       std::pair<const GlobalValue *, unsigned> Details) {
+    assert(MI && "MI must not be null");
+    assert(Details.first && "Global must not be null");
+    CalledGlobalsMap.insert({MI, Details});
+  }
+
+  /// Iterates over the full set of call sites and their associated globals.
+  auto getCalledGlobals() const {
+    return llvm::make_range(CalledGlobalsMap.begin(), CalledGlobalsMap.end());
+  }
+
   /// \name Exception Handling
   /// \{
 
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index ff7caec41855fd..b31ad11c3ee0ee 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -293,6 +293,7 @@ class SelectionDAG {
     MDNode *HeapAllocSite = nullptr;
     MDNode *PCSections = nullptr;
     MDNode *MMRA = nullptr;
+    std::pair<const GlobalValue *, unsigned> CalledGlobal{};
     bool NoMerge = false;
   };
   /// Out-of-line extra information for SDNodes.
@@ -2373,6 +2374,19 @@ class SelectionDAG {
     auto It = SDEI.find(Node);
     return It != SDEI.end() ? It->second.MMRA : nullptr;
   }
+  /// Set CalledGlobal to be associated with Node.
+  void addCalledGlobal(const SDNode *Node, const GlobalValue *GV,
+                       unsigned OpFlags) {
+    SDEI[Node].CalledGlobal = {GV, OpFlags};
+  }
+  /// Return CalledGlobal associated with Node, or a nullopt if none exists.
+  std::optional<std::pair<const GlobalValue *, unsigned>>
+  getCalledGlobal(const SDNode *Node) {
+    auto I = SDEI.find(Node);
+    return I != SDEI.end()
+               ? std::make_optional(std::move(I->second).CalledGlobal)
+               : std::nullopt;
+  }
   /// Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
   void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge) {
     if (NoMerge)
diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h
index e2a2c84e47910b..fb575fe721015c 100644
--- a/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -73,6 +73,10 @@ class MCObjectFileInfo {
   /// to emit them into.
   MCSection *CompactUnwindSection = nullptr;
 
+  /// If import call optimization is supported by the target, this is the
+  /// section to emit import call data to.
+  MCSection *ImportCallSection = nullptr;
+
   // Dwarf sections for debug info.  If a target supports debug info, these must
   // be set.
   MCSection *DwarfAbbrevSection = nullptr;
@@ -269,6 +273,7 @@ class MCObjectFileInfo {
   MCSection *getBSSSection() const { return BSSSection; }
   MCSection *getReadOnlySection() const { return ReadOnlySection; }
   MCSection *getLSDASection() const { return LSDASection; }
+  MCSection *getImportCallSection() const { return ImportCallSection; }
   MCSection *getCompactUnwindSection() const { return CompactUnwindSection; }
   MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; }
   MCSection *getDwarfInfoSection() const { return DwarfInfoSection; }
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index 21da4dac4872b4..558b14cebfd3d1 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -569,6 +569,14 @@ class MCStreamer {
   /// \param Symbol - Symbol the image relative relocation should point to.
   virtual void emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset);
 
+  /// Emits the physical number of the section containing the given symbol as
+  /// assigned during object writing (i.e., this is not a runtime relocation).
+  virtual void emitCOFFSecNumber(MCSymbol const *Symbol);
+
+  /// Emits the offset of the symbol from the beginning of the section during
+  /// object writing (i.e., this is not a runtime relocation).
+  virtual void emitCOFFSecOffset(MCSymbol const *Symbol);
+
   /// Emits an lcomm directive with XCOFF csect information.
   ///
   /// \param LabelSym - Label on the block of storage.
diff --git a/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h b/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h
index a4ede61e45099d..13d8c7d060c9ef 100644
--- a/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h
+++ b/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h
@@ -72,6 +72,7 @@ class WinCOFFObjectWriter final : public MCObjectWriter {
                         const MCFixup &Fixup, MCValue Target,
                         uint64_t &FixedValue) override;
   uint64_t writeObject(MCAssembler &Asm) override;
+  int getSectionNumber(const MCSection &Section) const;
 };
 
 /// Construct a new Win COFF writer instance.
diff --git a/llvm/include/llvm/MC/MCWinCOFFStreamer.h b/llvm/include/llvm/MC/MCWinCOFFStreamer.h
index 5c39d80538944b..2425abe51e6dd9 100644
--- a/llvm/include/llvm/MC/MCWinCOFFStreamer.h
+++ b/llvm/include/llvm/MC/MCWinCOFFStreamer.h
@@ -58,6 +58,8 @@ class MCWinCOFFStreamer : public MCObjectStreamer {
   void emitCOFFSectionIndex(MCSymbol const *Symbol) override;
   void emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override;
   void emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override;
+  void emitCOFFSecNumber(MCSymbol const *Symbol) override;
+  void emitCOFFSecOffset(MCSymbol const *Symbol) override;
   void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                         Align ByteAlignment) override;
   void emitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index e2543f883f91ce..de2fe925c2d5c9 100644
--- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -158,6 +158,9 @@ class MIRParserImpl {
                                  MachineFunction &MF,
                                  const yaml::MachineFunction &YMF);
 
+  bool parseCalledGlobals(PerFunctionMIParsingState &PFS, MachineFunction &MF,
+                          const yaml::MachineFunction &YMF);
+
 private:
   bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node,
                    const yaml::StringValue &Source);
@@ -183,6 +186,9 @@ class MIRParserImpl {
 
   void setupDebugValueTracking(MachineFunction &MF,
     PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF);
+
+  bool parseMachineInst(MachineFunction &MF, yaml::MachineInstrLoc MILoc,
+                        MachineInstr const *&MI);
 };
 
 } // end namespace llvm
@@ -457,24 +463,34 @@ bool MIRParserImpl::computeFunctionProperties(
   return false;
 }
 
+bool MIRParserImpl::parseMachineInst(MachineFunction &MF,
+                                     yaml::MachineInstrLoc MILoc,
+                                     MachineInstr const *&MI) {
+  if (MILoc.BlockNum >= MF.size()) {
+    return error(Twine(MF.getName()) +
+                 Twine(" instruction block out of range.") +
+                 " Unable to reference bb:" + Twine(MILoc.BlockNum));
+  }
+  auto BB = std::next(MF.begin(), MILoc.BlockNum);
+  if (MILoc.Offset >= BB->size())
+    return error(
+        Twine(MF.getName()) + Twine(" instruction offset out of range.") +
+        " Unable to reference instruction at bb: " + Twine(MILoc.BlockNum) +
+        " at offset:" + Twine(MILoc.Offset));
+  MI = &*std::next(BB->instr_begin(), MILoc.Offset);
+  return false;
+}
+
 bool MIRParserImpl::initializeCallSiteInfo(
     PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF) {
   MachineFunction &MF = PFS.MF;
   SMDiagnostic Error;
   const TargetMachine &TM = MF.getTarget();
   for (auto &YamlCSInfo : YamlMF.CallSitesInfo) {
-    yaml::CallSiteInfo::MachineInstrLoc MILoc = YamlCSInfo.CallLocation;
-    if (MILoc.BlockNum >= MF.size())
-      return error(Twine(MF.getName()) +
-                   Twine(" call instruction block out of range.") +
-                   " Unable to reference bb:" + Twine(MILoc.BlockNum));
-    auto CallB = std::next(MF.begin(), MILoc.BlockNum);
-    if (MILoc.Offset >= CallB->size())
-      return error(Twine(MF.getName()) +
-                   Twine(" call instruction offset out of range.") +
-                   " Unable to reference instruction at bb: " +
-                   Twine(MILoc.BlockNum) + " at offset:" + Twine(MILoc.Offset));
-    auto CallI = std::next(CallB->instr_begin(), MILoc.Offset);
+    yaml::MachineInstrLoc MILoc = YamlCSInfo.CallLocation;
+    const MachineInstr *CallI;
+    if (parseMachineInst(MF, MILoc, CallI))
+      return true;
     if (!CallI->isCall(MachineInstr::IgnoreBundle))
       return error(Twine(MF.getName()) +
                    Twine(" call site info should reference call "
@@ -641,6 +657,9 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
   if (initializeCallSiteInfo(PFS, YamlMF))
     return true;
 
+  if (parseCalledGlobals(PFS, MF, YamlMF))
+    return true;
+
   setupDebugValueTracking(MF, PFS, YamlMF);
 
   MF.getSubtarget().mirFileLoaded(MF);
@@ -1111,6 +1130,37 @@ bool MIRParserImpl::parseMachineMetadataNodes(
   return false;
 }
 
+bool MIRParserImpl::parseCalledGlobals(PerFunctionMIParsingState &PFS,
+                                       MachineFunction &MF,
+                                       const yaml::MachineFunction &YMF) {
+  Function &F = MF.getFunction();
+  for (const auto &YamlCG : YMF.CalledGlobals) {
+    yaml::MachineInstrLoc MILoc = YamlCG.CallSite;
+    const MachineInstr *CallI;
+    if (parseMachineInst(MF, MILoc, CallI))
+      return true;
+    if (!CallI->isCall(MachineInstr::IgnoreBundle))
+      return error(Twine(MF.getName()) +
+                   Twine(" called global should reference call "
+                         "instruction. Instruction at bb:") +
+                   Twine(MILoc.BlockNum) + " at offset:" + Twine(MILoc.Offset) +
+                   " is not a call instruction");
+
+    auto Callee =
+        F.getParent()->getValueSymbolTable().lookup(YamlCG.Callee.Value);
+    if (!Callee)
+      return error(YamlCG.Callee.SourceRange.Start,
+                   "use of undefined global '" + YamlCG.Callee.Value + "'");
+    if (!isa<GlobalValue>(Callee))
+      return error(YamlCG.Callee.SourceRange.Start,
+                   "use of non-global value '" + YamlCG.Callee.Value + "'");
+
+    MF.addCalledGlobal(CallI, {cast<GlobalValue>(Callee), YamlCG.Flags});
+  }
+
+  return false;
+}
+
 SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error,
                                                  SMRange SourceRange) {
   assert(SourceRange.isValid() && "Invalid source range");
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index c8f6341c1224d2..b8e41cc789856e 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -133,6 +133,9 @@ class MIRPrinter {
   void convertMachineMetadataNodes(yaml::MachineFunction &YMF,
                                    const MachineFunction &MF,
                                    MachineModuleSlotTracker &MST);
+  void convertCalledGlobals(yaml::MachineFunction &YMF,
+                            const MachineFunction &MF,
+                            MachineModuleSlotTracker &MST);
 
 private:
   void initRegisterMaskIds(const MachineFunction &MF);
@@ -269,6 +272,8 @@ void MIRPrinter::print(const MachineFunction &MF) {
   // function.
   convertMachineMetadataNodes(YamlMF, MF, MST);
 
+  convertCalledGlobals(YamlMF, MF, MST);
+
   yaml::Output Out(OS);
   if (!SimplifyMIR)
       Out.setWriteDefaultValues(true);
@@ -555,7 +560,7 @@ void MIRPrinter::convertCallSiteObjects(yaml::MachineFunction &YMF,
   const auto *TRI = MF.getSubtarget().getRegisterInfo();
   for (auto CSInfo : MF.getCallSitesInfo()) {
     yaml::CallSiteInfo YmlCS;
-    yaml::CallSiteInfo::MachineInstrLoc CallLocation;
+    yaml::MachineInstrLoc CallLocation;
 
     // Prepare instruction position.
     MachineBasicBlock::const_instr_iterator CallI = CSInfo.first->getIterator();
@@ -596,6 +601,32 @@ void MIRPrinter::convertMachineMetadataNodes(yaml::MachineFunction &YMF,
   }
 }
 
+void MIRPrinter::convertCalledGlobals(yaml::MachineFunction &YMF,
+                                      const MachineFunction &MF,
+                                      MachineModuleSlotTracker &MST) {
+  for (const auto [CallInst, CG] : MF.getCalledGlobals()) {
+    // If the call instruction was dropped, then we don't need to print it.
+    auto BB = CallInst->getParent();
+    if (BB) {
+      yaml::MachineInstrLoc CallSite;
+      CallSite.BlockNum = CallInst->getParent()->getNumber();
+      CallSite.Offset = std::distance(CallInst->getParent()->instr_begin(),
+                                      CallInst->getIterator());
+
+      yaml::CalledGlobal YamlCG{CallSite, CG.first->getName().str(), CG.second};
+      YMF.CalledGlobals.push_back(YamlCG);
+    }
+  }
+
+  // Sort by position of call instructions.
+  llvm::sort(YMF.CalledGlobals.begin(), YMF.CalledGlobals.end(),
+             [](yaml::CalledGlobal A, yaml::CalledGlobal B) {
+               if (A.CallSite.BlockNum == B.CallSite.BlockNum)
+                 return A.CallSite.Offset < B.CallSite.Offset;
+               return A.CallSite.BlockNum < B.CallSite.BlockNum;
+             });
+}
+
 void MIRPrinter::convert(yaml::MachineFunction &MF,
                          const MachineConstantPool &ConstantPool) {
   unsigned ID = 0;
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 26fc75c0578ec2..aad72c248b0f6b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -908,6 +908,10 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
         It->setMMRAMetadata(MF, MMRA);
     }
 
+    if (auto CalledGlobal = DAG->getCalledGlobal(Node))
+      if (CalledGlobal->first)
+        MF.addCalledGlobal(MI, *CalledGlobal);
+
     return MI;
   };
 
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 01fe11ed205017..dd8058c6d5cd80 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -209,6 +209,8 @@ class MCAsmStreamer final : public MCStreamer {
   void emitCOFFSectionIndex(MCSymbol const *Symbol) override;
   void emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override;
   void emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) override;
+  void emitCOFFSecNumber(MCSymbol const *Symbol) override;
+  void emitCOFFSecOffset(MCSymbol const *Symbol) override;
   void emitXCOFFLocalCommonSymbol(MCSymbol *LabelSym, uint64_t Size,
                                   MCSymbol *CsectSym, Align Alignment) override;
   void emitXCOFFSymbolLinkageWithVisibility(MCSymbol *Symbol,
@@ -893,6 +895,18 @@ void MCAsmStreamer::emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) {
   EmitEOL();
 }
 
+void MCAsmStreamer::emitCOFFSecNumber(MCSymbol const *Symbol) {
+  OS << "\t.secnum\t";
+  Symbol->print(OS, MAI);
+  EmitEOL();
+}
+
+void MCAsmStreamer::emitCOFFSecOffset(MCSymbol const *Symbol) {
+  OS << "\t.secoffset\t";
+  Symbol->print(OS, MAI);
+  EmitEOL();
+}
+
 // We need an XCOFF-specific version of this directive as the AIX syntax
 // requires a QualName argument identifying the csect name and storage mapping
 // class to appear before the alignment if we are specifying it.
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index f37e138edc36b1..150e38a94db6a6 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -596,6 +596,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
                                           COFF::IMAGE_SCN_MEM_READ);
   }
 
+  if (T.getArch() == Triple::aarch64) {
+    ImportCallSection =
+        Ctx->getCOFFSection(".impcall", COFF::IMAGE_SCN_LNK_INFO);
+  }
+
   // Debug info.
   COFFDebugSymbolsSection =
       Ctx->getCOFFSection(".debug$S", (COFF::IMAGE_SCN_MEM_DISCARDABLE |
diff --git a/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/llvm/lib/MC/MCParser/COFFAsmParser.cpp
index 4d95a720852835..dd5ce9964a194c 100644
--- a/llvm/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/COFFAsmParser.cpp
@@ -70,6 +70,8 @@ class COFFAsmParser : public MCAsmParserExtension {
     addDirectiveHandler<&COFFAsmParser::parseDirectiveSymbolAttribute>(
         ".weak_anti_dep");
     addDirectiveHandler<&COFFAsmParser::parseDirectiveCGProfile>(".cg_profile");
+    addDirectiveHandler<&COFFAsmParser::parseDirectiveSecNum>(".secnum");
+    addDirectiveHandler<&COFFAsmParser::parseDirectiveSecOffset>(".secoffset");
 
     // Win64 EH directives.
     addDirectiveHandler<&COFFAsmParser::parseSEHDirectiveStartProc>(
@@ -126,6 +128,8 @@ class COFFAsmParser : public MCAsmParserExtension {
   bool parseDirectiveLinkOnce(StringRef, SMLoc);
   bool parseDirectiveRVA(StringRef, SMLoc);
   bool parseDirectiveCGProfile(StringRef, SMLoc);
+  bool parseDirectiveSecNum(StringRef, SMLoc);
+  bool parseDirectiveSecOffset(StringRef, SMLoc);
 
   // Win64 EH directives.
   bool parseSEHDirectiveStartProc(StringRef, SMLoc);
@@ -577,6 +581,36 @@ bool COFFAsmParser::parseDirectiveSymIdx(StringRef, SMLoc) {
   return false;
 }
 
+bool COFFAsmParser::parseDirectiveSecNum(StringRef, SMLoc) {
+  StringRef SymbolID;
+  if (getParser().parseIdentifier(SymbolID))
+    return TokError("expected identifier in directive");
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
+
+  Lex();
+  getStreamer().emitCOFFSecNumber(Symbol);
+  return false;
+}
+
+bool COFFAsmParser::parseDirectiveSecOffset(StringRef, SMLoc) {
+  StringRef SymbolID;
+  if (getParser().parseIdentifier(SymbolID))
+    return TokError("expected identifier in directive");
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
+
+  Lex();
+  getStreamer().emitCOFFSecOffset(Symbol);
+  return false;
+}
+
 /// ::= [ identifier ]
 bool COFFAsmParser::parseCOMDATType(COFF::COMDATType &Type) {
   StringRef TypeId = getTok().getIdentifier();
diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp
index ccf65df150e786..e690723c0e5024 100644
--- a/llvm/lib/MC/MCStreamer.cpp
+++ b/llvm/lib/MC/MCStreamer.cpp
@@ -1023,6 +1023,10 @@ void MCStreamer::emitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) {}
 
 void MCStreamer::emitCOFFImgRel32(MCSymbol const *Symbol, int64_t Offset) {}
 
+void MCStreamer::emitCOFFSecNumber(MCSymbol const *Symbol) {}
+
+void MCStreamer::emitCOFFSecOffset(MCSymbol const *Symbol) {}
+
 /// EmitRawText - If this file is backed by an assembly streamer, this dumps
 /// the specified string in the output .s file.  This capability is
 /// indicated by the hasRawTextSupport() predicate.
diff --git a/llvm/lib/MC/MCWinCOFFStreamer.cpp b/llvm/lib/MC/MCWinCOFFStreamer.cpp
index 395d4db3103d78..8fd46bc8b02554 100644
--- a/llvm/lib/MC/MCWinCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCWinCOFFStreamer.cpp
@@ -29,6 +29,7 @@
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCSymbolCOFF.h"
 #include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/MC/MCWinCOFFObjectWriter.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -43,6 +44,91 @@ using namespace llvm;
 
 #define DEBUG_TYPE "WinCOFFStreamer"
 
+/// MCExpr that represents the physical number for the sections that contains
+/// a symbol.
+class MCCOFFSectionNumberTargetExpr final : public MCTargetExpr {
+  const MCSymbol &SectionSymbol;
+  const WinCOFFObjectWriter &Writer;
+
+  MCCOFFSectionNumberTargetExpr(const MCSymbol &SectionSymbol_,
+                                const WinCOFFObjectWriter &Writer_)
+      : SectionSymbol(SectionSymbol_), Writer(Writer_) {}
+
+public:
+  static MCCOFFSectionNumberTargetExpr *
+  create(const MCSymbol &SectionSymbol, const WinCOFFObjectWriter &Writer,
+         MCContext &Ctx) {
+    return new (Ctx) MCCOFFSectionNumberTargetExpr(SectionSymbol, Writer);
+  }
+
+  void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override {
+    OS << ":secnum:";
+    SectionSymbol.print(OS, MAI);
+  }
+
+  bool evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
+                                 const MCFixup *Fixup) const override {
+    auto sectionNumber = Writer.getSectionNumber(SectionSymbol.getSection());
+    assert(sectionNumber != 0 &&
+           "Containing section was not assigned a number");
+    Res = MCValue::get(sectionNumber);
+    return true;
+  }
+
+  void visitUsedExpr(MCStreamer &Streamer) const override {
+    // Contains no sub-expressions.
+  }
+
+  MCFragment *findAssociatedFragment() const override {
+    return SectionSymbol.getFragment();
+  }
+
+  void fixELFSymbolsInTLSFixups(MCAssembler &) const override {
+    llvm_unreachable("Not supported for ELF");
+  }
+};
+
+/// MCExpr that represents the offset to a symbol from the beginning of its
+/// section.
+class MCCOFFSectionOffsetTargetExpr final : public MCTargetExpr {
+  const MCSymbol &Symbol;
+
+  MCCOFFSectionOffsetTargetExpr(const MCSymbol &Symbol_) : Symbol(Symbol_) {}
+
+public:
+  static MCCOFFSectionOffsetTargetExpr *create(const MCSymbol &Symbol,
+                                               MCContext &Ctx) {
+    return new (Ctx) MCCOFFSectionOffsetTargetExpr(Symbol);
+  }
+
+  void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override {
+    OS << ":secoffset:";
+    Symbol.print(OS, MAI);
+  }
+
+  bool evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
+                                 const MCFixup *Fixup) const override {
+    uint64_t CallsiteOffset = 0;
+    if (!Asm->getSymbolOffset(Symbol, CallsiteOffset)) {
+      return true;
+    }
+    Res = MCValue::get(CallsiteOffset);
+    return true;
+  }
+
+  void visitUsedExpr(MCStreamer &Streamer) const override {
+    // Contains no sub-expressions.
+  }
+
+  MCFragment *findAssociatedFragment() const override {
+    return Symbol.getFragment();
+  }
+
+  void fixELFSymbolsInTLSFixups(MCAssembler &) const override {
+    llvm_unreachable("Not supported for ELF");
+  }
+};
+
 MCWinCOFFStreamer::MCWinCOFFStreamer(MCContext &Context,
                                      std::unique_ptr<MCAsmBackend> MAB,
                                      std::unique_ptr<MCCodeEmitter> CE,
@@ -280,6 +366,34 @@ void MCWinCOFFStreamer::emitCOFFImgRel32(const MCSymbol *Symbol,
   DF->appendContents(4, 0);
 }
 
+void MCWinCOFFStreamer::emitCOFFSecNumber(MCSymbol const *Symbol) {
+  visitUsedSymbol(*Symbol);
+  MCDataFragment *DF = getOrCreateDataFragment();
+  // Create Symbol for section number.
+  const MCExpr *MCE = MCCOFFSectionNumberTargetExpr::create(
+      *Symbol, this->getWriter(), getContext());
+  // Build the relocation.
+  MCFixup Fixup = MCFixup::create(DF->getContents().size(), MCE, FK_Data_4);
+  // Record the relocation.
+  DF->getFixups().push_back(Fixup);
+  // Emit 4 bytes (zeros) to the object file.
+  DF->appendContents(4, 0);
+}
+
+void MCWinCOFFStreamer::emitCOFFSecOffset(MCSymbol const *Symbol) {
+  visitUsedSymbol(*Symbol);
+  MCDataFragment *DF = getOrCreateDataFragment();
+  // Create Symbol for section offset.
+  const MCExpr *MCE =
+      MCCOFFSectionOffsetTargetExpr::create(*Symbol, getContext());
+  // Build the relocation.
+  MCFixup Fixup = MCFixup::create(DF->getContents().size(), MCE, FK_Data_4);
+  // Record the relocation.
+  DF->getFixups().push_back(Fixup);
+  // Emit 4 bytes (zeros) to the object file.
+  DF->appendContents(4, 0);
+}
+
 void MCWinCOFFStreamer::emitCommonSymbol(MCSymbol *S, uint64_t Size,
                                          Align ByteAlignment) {
   auto *Symbol = cast<MCSymbolCOFF>(S);
diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 09d2b08e43050f..39e02d0522bcfb 100644
--- a/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -163,6 +163,7 @@ class llvm::WinCOFFWriter {
                         const MCFixup &Fixup, MCValue Target,
                         uint64_t &FixedValue);
   uint64_t writeObject(MCAssembler &Asm);
+  int getSectionNumber(const MCSection &Section) const;
 
 private:
   COFFSymbol *createSymbol(StringRef Name);
@@ -818,6 +819,15 @@ void WinCOFFWriter::executePostLayoutBinding(MCAssembler &Asm) {
       if (!Symbol.isTemporary() ||
           cast<MCSymbolCOFF>(Symbol).getClass() == COFF::IMAGE_SYM_CLASS_STATIC)
         defineSymbol(Asm, Symbol);
+
+  UseBigObj = Sections.size() > COFF::MaxNumberOfSections16;
+  Header.NumberOfSections = Sections.size();
+  Header.NumberOfSymbols = 0;
+  if (Sections.size() > INT32_MAX)
+    report_fatal_error(
+        "PE COFF object files can't have more than 2147483647 sections");
+
+  assignSectionNumbers();
 }
 
 void WinCOFFWriter::recordRelocation(MCAssembler &Asm,
@@ -980,16 +990,7 @@ static std::time_t getTime() {
 uint64_t WinCOFFWriter::writeObject(MCAssembler &Asm) {
   uint64_t StartOffset = W.OS.tell();
 
-  if (Sections.size() > INT32_MAX)
-    report_fatal_error(
-        "PE COFF object files can't have more than 2147483647 sections");
-
-  UseBigObj = Sections.size() > COFF::MaxNumberOfSections16;
-  Header.NumberOfSections = Sections.size();
-  Header.NumberOfSymbols = 0;
-
   setWeakDefaultNames();
-  assignSectionNumbers();
   if (Mode != DwoOnly)
     createFileSymbols(Asm);
 
@@ -1143,6 +1144,10 @@ uint64_t WinCOFFWriter::writeObject(MCAssembler &Asm) {
   return W.OS.tell() - StartOffset;
 }
 
+int WinCOFFWriter::getSectionNumber(const MCSection &Section) const {
+  return SectionMap.at(&Section)->Number;
+}
+
 //------------------------------------------------------------------------------
 // WinCOFFObjectWriter class implementation
 
@@ -1194,6 +1199,10 @@ uint64_t WinCOFFObjectWriter::writeObject(MCAssembler &Asm) {
   return TotalSize;
 }
 
+int WinCOFFObjectWriter::getSectionNumber(const MCSection &Section) const {
+  return ObjWriter->getSectionNumber(Section);
+}
+
 MCWinCOFFObjectTargetWriter::MCWinCOFFObjectTargetWriter(unsigned Machine_)
     : Machine(Machine_) {}
 
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 9bec782ca8ce97..776702928ccada 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -24,6 +24,7 @@
 #include "MCTargetDesc/AArch64TargetStreamer.h"
 #include "TargetInfo/AArch64TargetInfo.h"
 #include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
@@ -77,6 +78,11 @@ static cl::opt<PtrauthCheckMode> PtrauthAuthChecks(
     cl::desc("Check pointer authentication auth/resign failures"),
     cl::init(Default));
 
+static cl::opt<bool> EnableImportCallOptimization(
+    "aarch64-win-import-call-optimization", cl::Hidden,
+    cl::desc("Enable import call optimization for AArch64 Windows"),
+    cl::init(false));
+
 #define DEBUG_TYPE "asm-printer"
 
 namespace {
@@ -89,6 +95,8 @@ class AArch64AsmPrinter : public AsmPrinter {
 #ifndef NDEBUG
   unsigned InstsEmitted;
 #endif
+  DenseMap<MCSection *, std::vector<std::pair<MCSymbol *, MCSymbol *>>>
+      SectionToImportedFunctionCalls;
 
 public:
   AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
@@ -293,6 +301,11 @@ class AArch64AsmPrinter : public AsmPrinter {
                               MCSymbol *LazyPointer) override;
   void emitMachOIFuncStubHelperBody(Module &M, const GlobalIFunc &GI,
                                     MCSymbol *LazyPointer) override;
+
+  /// Checks if this instruction is part of a sequence that is eligle for import
+  /// call optimization and, if so, records it to be emitted in the import call
+  /// section.
+  void recordIfImportCall(const MachineInstr *BranchInst);
 };
 
 } // end anonymous namespace
@@ -921,6 +934,38 @@ void AArch64AsmPrinter::emitEndOfAsmFile(Module &M) {
   // Emit stack and fault map information.
   FM.serializeToFaultMapSection();
 
+  // If import call optimization is enabled, emit the appropriate section.
+  // We do this whether or not we recorded any import calls.
+  if (EnableImportCallOptimization && TT.isOSBinFormatCOFF()) {
+    OutStreamer->switchSection(getObjFileLowering().getImportCallSection());
+
+    // Section always starts with some magic.
+    constexpr char ImpCallMagic[12] = "Imp_Call_V1";
+    OutStreamer->emitBytes(StringRef{ImpCallMagic, sizeof(ImpCallMagic)});
+
+    // Layout of this section is:
+    // Per section that contains calls to imported functions:
+    //  uint32_t SectionSize: Size in bytes for information in this section.
+    //  uint32_t Section Number
+    //  Per call to imported function in section:
+    //    uint32_t Kind: the kind of imported function.
+    //    uint32_t BranchOffset: the offset of the branch instruction in its
+    //                            parent section.
+    //    uint32_t TargetSymbolId: the symbol id of the called function.
+    for (auto &[Section, CallsToImportedFuncs] :
+         SectionToImportedFunctionCalls) {
+      unsigned SectionSize =
+          sizeof(uint32_t) * (2 + 3 * CallsToImportedFuncs.size());
+      OutStreamer->emitInt32(SectionSize);
+      OutStreamer->emitCOFFSecNumber(Section->getBeginSymbol());
+      for (auto &[CallsiteSymbol, CalledSymbol] : CallsToImportedFuncs) {
+        // Kind is always IMAGE_REL_ARM64_DYNAMIC_IMPORT_CALL (0x13).
+        OutStreamer->emitInt32(0x13);
+        OutStreamer->emitCOFFSecOffset(CallsiteSymbol);
+        OutStreamer->emitCOFFSymbolIndex(CalledSymbol);
+      }
+    }
+  }
 }
 
 void AArch64AsmPrinter::emitLOHs() {
@@ -2694,6 +2739,7 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
   case AArch64::TCRETURNriALL: {
     emitPtrauthTailCallHardening(MI);
 
+    recordIfImportCall(MI);
     MCInst TmpInst;
     TmpInst.setOpcode(AArch64::BR);
     TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
@@ -2705,6 +2751,7 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
 
     MCOperand Dest;
     MCInstLowering.lowerOperand(MI->getOperand(0), Dest);
+    recordIfImportCall(MI);
     MCInst TmpInst;
     TmpInst.setOpcode(AArch64::B);
     TmpInst.addOperand(Dest);
@@ -3035,6 +3082,14 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
     TS->emitARM64WinCFISaveAnyRegQPX(MI->getOperand(0).getImm(),
                                      -MI->getOperand(2).getImm());
     return;
+
+  case AArch64::BLR:
+  case AArch64::BR:
+    recordIfImportCall(MI);
+    MCInst TmpInst;
+    MCInstLowering.Lower(MI, TmpInst);
+    EmitToStreamer(*OutStreamer, TmpInst);
+    return;
   }
 
   // Finally, do the automated lowerings for everything else.
@@ -3043,6 +3098,23 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
   EmitToStreamer(*OutStreamer, TmpInst);
 }
 
+void AArch64AsmPrinter::recordIfImportCall(
+    const llvm::MachineInstr *BranchInst) {
+  if (!EnableImportCallOptimization ||
+      !TM.getTargetTriple().isOSBinFormatCOFF())
+    return;
+
+  auto [GV, OpFlags] = BranchInst->getMF()->tryGetCalledGlobal(BranchInst);
+  if (GV && GV->hasDLLImportStorageClass()) {
+    auto *CallSiteSymbol = MMI->getContext().createNamedTempSymbol("impcall");
+    OutStreamer->emitLabel(CallSiteSymbol);
+
+    auto *CalledSymbol = MCInstLowering.GetGlobalValueSymbol(GV, OpFlags);
+    SectionToImportedFunctionCalls[OutStreamer->getCurrentSectionOnly()]
+        .push_back({CallSiteSymbol, CalledSymbol});
+  }
+}
+
 void AArch64AsmPrinter::emitMachOIFuncStubBody(Module &M, const GlobalIFunc &GI,
                                                MCSymbol *LazyPointer) {
   // _ifunc:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 070163a5fb297c..d7c4d33db177e0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9386,12 +9386,14 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
   // node so that legalize doesn't hack it.
+  const GlobalValue *CalledGlobal = nullptr;
+  unsigned OpFlags = 0;
   if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    auto GV = G->getGlobal();
-    unsigned OpFlags =
-        Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
+    CalledGlobal = G->getGlobal();
+    OpFlags = Subtarget->classifyGlobalFunctionReference(CalledGlobal,
+                                                         getTargetMachine());
     if (OpFlags & AArch64II::MO_GOT) {
-      Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
+      Callee = DAG.getTargetGlobalAddress(CalledGlobal, DL, PtrVT, 0, OpFlags);
       Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
     } else {
       const GlobalValue *GV = G->getGlobal();
@@ -9511,6 +9513,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
 
     DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
     DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
+    if (CalledGlobal)
+      DAG.addCalledGlobal(Ret.getNode(), CalledGlobal, OpFlags);
     return Ret;
   }
 
@@ -9522,6 +9526,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
   InGlue = Chain.getValue(1);
   DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
+  if (CalledGlobal)
+    DAG.addCalledGlobal(Chain.getNode(), CalledGlobal, OpFlags);
 
   uint64_t CalleePopBytes =
       DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
diff --git a/llvm/test/CodeGen/AArch64/win-import-call-optimization-nocalls.ll b/llvm/test/CodeGen/AArch64/win-import-call-optimization-nocalls.ll
new file mode 100644
index 00000000000000..81d6d6369dcbf4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/win-import-call-optimization-nocalls.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=aarch64-pc-windows-msvc -aarch64-win-import-call-optimization < %s | FileCheck %s
+
+define dso_local void @normal_call() local_unnamed_addr {
+entry:
+  call void @a()
+  ret void
+}
+; CHECK-LABEL:  normal_call:
+; CHECK:        bl a
+
+declare void @a() local_unnamed_addr
+
+; Even if there are no calls to imported functions, we still need to emit the
+; .impcall section.
+
+; CHECK-LABEL  .section   .impcall,"yi"
+; CHECK-NEXT   .asciz  "Imp_Call_V1"
+; CHECK-NOT    .secnum
diff --git a/llvm/test/CodeGen/AArch64/win-import-call-optimization.ll b/llvm/test/CodeGen/AArch64/win-import-call-optimization.ll
new file mode 100644
index 00000000000000..6bb118ba1e1596
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/win-import-call-optimization.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=aarch64-pc-windows-msvc -aarch64-win-import-call-optimization < %s | FileCheck %s --check-prefix=CHECK-ENABLED
+; RUN: llc -mtriple=aarch64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK-DISABLED
+
+; CHECK-DISABLED-NOT: .section        .impcall
+
+define dso_local void @normal_call() local_unnamed_addr section "nc_sect" {
+entry:
+  call void @a()
+  call void @a()
+  ret void
+}
+; CHECK-ENABLED-LABEL:  normal_call:
+; CHECK-ENABLED:        adrp    [[ADRPREG:x[0-9]+]], __imp_a
+; CHECK-ENABLED-NEXT:   ldr     [[LDRREG:x[0-9]+]], [[[ADRPREG]], :lo12:__imp_a]
+; CHECK-ENABLED-NEXT:   .Limpcall0:
+; CHECK-ENABLED-NEXT:   blr     [[LDRREG]]
+; CHECK-ENABLED-NEXT:   .Limpcall1:
+; CHECK-ENABLED-NEXT:   blr     [[LDRREG]]
+
+define dso_local void @tail_call() local_unnamed_addr section "tc_sect" {
+entry:
+  tail call void @b()
+  ret void
+}
+; CHECK-ENABLED-LABEL:  tail_call:
+; CHECK-ENABLED:        adrp    [[ADRPREG:x[0-9]+]], __imp_b
+; CHECK-ENABLED-NEXT:   ldr     [[LDRREG:x[0-9]+]], [[[ADRPREG]], :lo12:__imp_b]
+; CHECK-ENABLED-NEXT:   .Limpcall2:
+; CHECK-ENABLED-NEXT:   br      [[LDRREG]]
+
+declare dllimport void @a() local_unnamed_addr
+declare dllimport void @b() local_unnamed_addr
+
+; CHECK-ENABLED-LABEL  .section   .impcall,"yi"
+; CHECK-ENABLED-NEXT   .asciz  "Imp_Call_V1"
+; CHECK-ENABLED-NEXT   .word   32
+; CHECK-ENABLED-NEXT   .secnum nc_sect
+; CHECK-ENABLED-NEXT   .word   19
+; CHECK-ENABLED-NEXT   .secoffset      .Limpcall0
+; CHECK-ENABLED-NEXT   .symidx __imp_a
+; CHECK-ENABLED-NEXT   .word   19
+; CHECK-ENABLED-NEXT   .secoffset      .Limpcall1
+; CHECK-ENABLED-NEXT   .symidx __imp_a
+; CHECK-ENABLED-NEXT   .word   20
+; CHECK-ENABLED-NEXT   .secnum tc_sect
+; CHECK-ENABLED-NEXT   .word   19
+; CHECK-ENABLED-NEXT   .secoffset      .Limpcall2
+; CHECK-ENABLED-NEXT   .symidx __imp_b
diff --git a/llvm/test/CodeGen/MIR/AArch64/called-globals.mir b/llvm/test/CodeGen/MIR/AArch64/called-globals.mir
new file mode 100644
index 00000000000000..cf0f0a23e2d910
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/AArch64/called-globals.mir
@@ -0,0 +1,61 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass none -o - %s | FileCheck %s
+
+--- |
+  declare dllimport void @callee_func() local_unnamed_addr
+
+  define dso_local void @caller() local_unnamed_addr {
+  entry:
+    call void @callee_func()
+    call void @callee_func()
+    ret void
+  }
+...
+---
+name:            caller
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8,
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8,
+      stack-id: default, callee-saved-register: '$x19', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+calledGlobals:
+  - bb:              0
+    offset:          7
+    callee:          callee_func
+    flags:           144
+  - bb:              0
+    offset:          8
+    callee:          callee_func
+    flags:           144
+body:             |
+  bb.0.entry:
+    liveins: $x19, $lr
+
+    early-clobber $sp = frame-setup STRXpre killed $x19, $sp, -16 :: (store (s64) into %stack.1)
+    frame-setup SEH_SaveReg_X 19, -16
+    frame-setup STRXui killed $lr, $sp, 1 :: (store (s64) into %stack.0)
+    frame-setup SEH_SaveReg 30, 8
+    frame-setup SEH_PrologEnd
+    $x19 = ADRP target-flags(aarch64-page, aarch64-got, aarch64-dllimport) @callee_func
+    renamable $x19 = LDRXui killed $x19, target-flags(aarch64-pageoff, aarch64-got, aarch64-nc, aarch64-dllimport) @callee_func
+    BLR renamable $x19, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+    BLR killed renamable $x19, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+    frame-destroy SEH_EpilogStart
+    $lr = frame-destroy LDRXui $sp, 1 :: (load (s64) from %stack.0)
+    frame-destroy SEH_SaveReg 30, 8
+    early-clobber $sp, $x19 = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.1)
+    frame-destroy SEH_SaveReg_X 19, -16
+    frame-destroy SEH_EpilogEnd
+    RET undef $lr
+...
+
+# CHECK-LABEL: calledGlobals:
+# CHECK-NEXT:  - bb:              0
+# CHECK-NEXT:    offset:          7
+# CHECK-NEXT:    callee:          callee_func
+# CHECK-NEXT:    flags:           144
+# CHECK-NEXT:  - bb:              0
+# CHECK-NEXT:    offset:          8
+# CHECK-NEXT:    callee:          callee_func
+# CHECK-NEXT:    flags:           144
diff --git a/llvm/test/CodeGen/MIR/X86/call-site-info-error1.mir b/llvm/test/CodeGen/MIR/X86/call-site-info-error1.mir
index 096a80f77dbb65..e4dab779216a88 100644
--- a/llvm/test/CodeGen/MIR/X86/call-site-info-error1.mir
+++ b/llvm/test/CodeGen/MIR/X86/call-site-info-error1.mir
@@ -1,5 +1,5 @@
 # RUN: not llc -mtriple=x86_64-- -run-pass none -debug-entry-values %s -o - 2>&1 | FileCheck %s
-# CHECK: baa call instruction block out of range. Unable to reference bb:1
+# CHECK: baa instruction block out of range. Unable to reference bb:1
 --- |
   define dso_local i32 @baa(i32 %a) local_unnamed_addr {
   entry:
diff --git a/llvm/test/CodeGen/MIR/X86/call-site-info-error2.mir b/llvm/test/CodeGen/MIR/X86/call-site-info-error2.mir
index bd5b2451a8d768..183610b326eeb4 100644
--- a/llvm/test/CodeGen/MIR/X86/call-site-info-error2.mir
+++ b/llvm/test/CodeGen/MIR/X86/call-site-info-error2.mir
@@ -1,5 +1,5 @@
 # RUN: not llc -mtriple=x86_64-- -run-pass none -debug-entry-values %s -o - 2>&1 | FileCheck %s
-# CHECK: baa call instruction offset out of range. Unable to reference instruction at bb: 0 at offset:1
+# CHECK: baa instruction offset out of range. Unable to reference instruction at bb: 0 at offset:1
 --- |
   define dso_local i32 @baa(i32 %a) local_unnamed_addr {
   entry:
diff --git a/llvm/test/MC/AArch64/win-import-call-optimization.s b/llvm/test/MC/AArch64/win-import-call-optimization.s
new file mode 100644
index 00000000000000..f26e17b9b62cc0
--- /dev/null
+++ b/llvm/test/MC/AArch64/win-import-call-optimization.s
@@ -0,0 +1,72 @@
+// RUN: llvm-mc -triple aarch64-windows-msvc -filetype obj -o %t.obj %s
+// RUN: llvm-readobj --sections --sd --relocs %t.obj | FileCheck %s
+
+.section        nc_sect,"xr"
+normal_call:
+  str     x30, [sp, #-16]!                // 8-byte Folded Spill
+  adrp    x8, __imp_a
+  ldr     x8, [x8, :lo12:__imp_a]
+.Limpcall0:
+  blr     x8
+  ldr     x30, [sp], #16                  // 8-byte Folded Reload
+  ret
+
+.section        tc_sect,"xr"
+tail_call:
+  adrp    x8, __imp_b
+  ldr     x8, [x8, :lo12:__imp_b]
+.Limpcall1:
+  br     x8
+
+.section        .impcall,"yi"
+.asciz  "Imp_Call_V1"
+.word   20
+.secnum nc_sect
+.word   19
+.secoffset      .Limpcall0
+.symidx __imp_a
+.word   20
+.secnum tc_sect
+.word   19
+.secoffset      .Limpcall1
+.symidx __imp_b
+
+// CHECK-LABEL: Name: .impcall (2E 69 6D 70 63 61 6C 6C)
+// CHECK-NEXT:  VirtualSize: 0x0
+// CHECK-NEXT:  VirtualAddress: 0x0
+// CHECK-NEXT:  RawDataSize: 52
+// CHECK-NEXT:  PointerToRawData: 0x150
+// CHECK-NEXT:  PointerToRelocations: 0x0
+// CHECK-NEXT:  PointerToLineNumbers: 0x0
+// CHECK-NEXT:  RelocationCount: 0
+// CHECK-NEXT:  LineNumberCount: 0
+// CHECK-NEXT:  Characteristics [
+// CHECK-NEXT:    IMAGE_SCN_ALIGN_4BYTES
+// CHECK-NEXT:    IMAGE_SCN_LNK_INFO
+// CHECK-NEXT:  ]
+// CHECK-NEXT:  SectionData (
+// CHECK-NEXT:    0000: 496D705F 43616C6C 5F563100 14000000  |Imp_Call_V1.....|
+// CHECK-NEXT:    0010:
+// CHECK-SAME:    [[#%.2X,NCSECT:]]000000
+// CHECK-SAME:    13000000
+// CHECK-SAME:    [[#%.2X,NCOFFSET:]]000000
+// CHECK-SAME:    [[#%.2X,NCSYM:]]000000
+// CHECK-NEXT:    0020:
+// CHECK-SAME:    14000000
+// CHECK-SAME:    [[#%.2X,TCSECT:]]000000
+// CHECK-SAME:    13000000
+// CHECK-SAME:    [[#%.2X,TCOFFSET:]]000000
+// CHECK-NEXT:    0030:
+// CHECK-SAME:    [[#%.2X,TCSYM:]]000000
+// CHECK-NEXT:  )
+
+// CHECK-LABEL: Relocations [
+// CHECK-NEXT:     Section ([[#%u,NCSECT]]) nc_sect {
+// CHECK-NEXT:       0x[[#%x,NCOFFSET - 8]] IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_a ([[#%u,NCSYM]])
+// CHECK-NEXT:       0x[[#%x,NCOFFSET - 4]] IMAGE_REL_ARM64_PAGEOFFSET_12L __imp_a ([[#%u,NCSYM]])
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Section ([[#%u,TCSECT]]) tc_sect {
+// CHECK-NEXT:       0x[[#%x,TCOFFSET - 8]] IMAGE_REL_ARM64_PAGEBASE_REL21 __imp_b ([[#%u,TCSYM]])
+// CHECK-NEXT:       0x[[#%x,TCOFFSET - 4]] IMAGE_REL_ARM64_PAGEOFFSET_12L __imp_b ([[#%u,TCSYM]])
+// CHECK-NEXT:     }
+// CHECK-NEXT:   ]
diff --git a/llvm/test/MC/COFF/bad-parse.s b/llvm/test/MC/COFF/bad-parse.s
new file mode 100644
index 00000000000000..2491f41abeb4e7
--- /dev/null
+++ b/llvm/test/MC/COFF/bad-parse.s
@@ -0,0 +1,13 @@
+// RUN: not llvm-mc -filetype=obj -triple i386-pc-win32 %s 2>&1 | FileCheck %s
+
+        .data
+
+// CHECK: [[@LINE+1]]:{{[0-9]+}}: error: expected identifier in directive
+        .secnum
+// CHECK: [[@LINE+1]]:{{[0-9]+}}: error: unexpected token in directive
+        .secnum section extra
+
+// CHECK: [[@LINE+1]]:{{[0-9]+}}: error: expected identifier in directive
+        .secoffset
+// CHECK: [[@LINE+1]]:{{[0-9]+}}: error: unexpected token in directive
+        .secoffset section extra



More information about the llvm-commits mailing list