[llvm] [MCLinker] MCLinker [nfr][wip] (PR #132989)

weiwei chen via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 25 13:21:52 PDT 2025


https://github.com/weiweichen created https://github.com/llvm/llvm-project/pull/132989

None

>From 9bc7b85383642995231afc4030457d25bc5f27b5 Mon Sep 17 00:00:00 2001
From: Weiwei Chen <weiwei.chen at modular.com>
Date: Sat, 30 Nov 2024 21:18:37 -0500
Subject: [PATCH 01/17] Add files.

---
 llvm/include/llvm/Support/ModuleSplitter.h | 19 +++++++++++++++++++
 llvm/lib/Support/CMakeLists.txt            |  1 +
 llvm/lib/Support/ModuleSplitter.cpp        | 14 ++++++++++++++
 3 files changed, 34 insertions(+)
 create mode 100644 llvm/include/llvm/Support/ModuleSplitter.h
 create mode 100644 llvm/lib/Support/ModuleSplitter.cpp

diff --git a/llvm/include/llvm/Support/ModuleSplitter.h b/llvm/include/llvm/Support/ModuleSplitter.h
new file mode 100644
index 0000000000000..9f01bac925d88
--- /dev/null
+++ b/llvm/include/llvm/Support/ModuleSplitter.h
@@ -0,0 +1,19 @@
+//===- ModuleSplitter.h - Module Splitter Functions -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MODULESPLITTER_H
+#define LLVM_SUPPORT_MODULESPLITTER_H
+namespace llvm {
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 2ecaea4b02bf6..4694e3102dd03 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -214,6 +214,7 @@ add_llvm_component_library(LLVMSupport
   MemoryBuffer.cpp
   MemoryBufferRef.cpp
   ModRef.cpp
+  ModuleSplitter.cpp
   MD5.cpp
   MSP430Attributes.cpp
   MSP430AttributeParser.cpp
diff --git a/llvm/lib/Support/ModuleSplitter.cpp b/llvm/lib/Support/ModuleSplitter.cpp
new file mode 100644
index 0000000000000..ea3a37656bcbc
--- /dev/null
+++ b/llvm/lib/Support/ModuleSplitter.cpp
@@ -0,0 +1,14 @@
+//===--- ModuleSplitter.cpp - Module Splitter -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ModRef.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;

>From 220c5788fd11bb4d0f69e5aaa04d854132dec65e Mon Sep 17 00:00:00 2001
From: Weiwei Chen <weiwei.chen at modular.com>
Date: Sat, 30 Nov 2024 21:19:00 -0500
Subject: [PATCH 02/17] Add ModuleSplitter.h

---
 llvm/include/llvm/Support/ModuleSplitter.h | 59 ++++++++++++++++++++++
 llvm/lib/Support/ModuleSplitter.cpp        |  3 +-
 2 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Support/ModuleSplitter.h b/llvm/include/llvm/Support/ModuleSplitter.h
index 9f01bac925d88..b09db1ee022b0 100644
--- a/llvm/include/llvm/Support/ModuleSplitter.h
+++ b/llvm/include/llvm/Support/ModuleSplitter.h
@@ -12,8 +12,67 @@
 
 #ifndef LLVM_SUPPORT_MODULESPLITTER_H
 #define LLVM_SUPPORT_MODULESPLITTER_H
+
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Error.h"
 namespace llvm {
 
+//===----------------------------------------------------------------------===//
+// LLVMModuleAndContext
+//===----------------------------------------------------------------------===//
+
+/// A pair of an LLVM module and the LLVM context that holds ownership of the
+/// objects. This is a useful class for parallelizing LLVM and managing
+/// ownership of LLVM instances.
+class LLVMModuleAndContext {
+public:
+  /// Expose the underlying LLVM context to create the module. This is the only
+  /// way to access the LLVM context to prevent accidental sharing.
+  Error create(
+      function_ref<ErrorOr<std::unique_ptr<llvm::Module>>(llvm::LLVMContext &)>
+          CreateModule);
+
+  llvm::Module &operator*() { return *Module; }
+  llvm::Module *operator->() { return Module.get(); }
+
+  void reset();
+
+private:
+  /// LLVM context stored in a unique pointer so that we can move this type.
+  std::unique_ptr<llvm::LLVMContext> CTX =
+      std::make_unique<llvm::LLVMContext>();
+  /// The paired LLVM module.
+  std::unique_ptr<llvm::Module> Module;
+};
+
+//===----------------------------------------------------------------------===//
+// Module Splitter
+//===----------------------------------------------------------------------===//
+
+using LLVMSplitProcessFn =
+    function_ref<void(llvm::unique_function<LLVMModuleAndContext()>,
+                      std::optional<int64_t>, unsigned)>;
+
+/// Helper to create a lambda that just forwards a preexisting Module.
+inline llvm::unique_function<LLVMModuleAndContext()>
+forwardModule(LLVMModuleAndContext &&Module) {
+  return [Module = std::move(Module)]() mutable { return std::move(Module); };
+}
+
+/// Support for splitting an LLVM module into multiple parts using anchored
+/// functions (e.g. exported functions), and pull in all dependency on the
+// call stack into one module.
+void splitPerAnchored(LLVMModuleAndContext Module,
+                      LLVMSplitProcessFn ProcessFn,
+                      llvm::SmallVectorImpl<llvm::Function>& Anchors);
+
+/// Support for splitting an LLVM module into multiple parts with each part
+/// contains only one function.
+void splitPerFunction(
+    LLVMModuleAndContext Module, LLVMSplitProcessFn ProcessFn);
+
 } // namespace llvm
 
 #endif
diff --git a/llvm/lib/Support/ModuleSplitter.cpp b/llvm/lib/Support/ModuleSplitter.cpp
index ea3a37656bcbc..be85707386b0d 100644
--- a/llvm/lib/Support/ModuleSplitter.cpp
+++ b/llvm/lib/Support/ModuleSplitter.cpp
@@ -8,7 +8,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/ModRef.h"
+#include "llvm/Support/ModuleSplitter.h"
 #include "llvm/ADT/STLExtras.h"
 
+
 using namespace llvm;

>From 30c4c1d9aead7900298953e86b2b901fe2eafcb6 Mon Sep 17 00:00:00 2001
From: Weiwei Chen <weiwei.chen at modular.com>
Date: Sat, 30 Nov 2024 23:45:09 -0500
Subject: [PATCH 03/17] checkpoint.

---
 llvm/include/llvm/Support/ModuleSplitter.h |   6 +-
 llvm/lib/Support/ModuleSplitter.cpp        | 811 ++++++++++++++++++++-
 2 files changed, 813 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/Support/ModuleSplitter.h b/llvm/include/llvm/Support/ModuleSplitter.h
index b09db1ee022b0..912d8edb7c189 100644
--- a/llvm/include/llvm/Support/ModuleSplitter.h
+++ b/llvm/include/llvm/Support/ModuleSplitter.h
@@ -30,8 +30,8 @@ class LLVMModuleAndContext {
 public:
   /// Expose the underlying LLVM context to create the module. This is the only
   /// way to access the LLVM context to prevent accidental sharing.
-  Error create(
-      function_ref<ErrorOr<std::unique_ptr<llvm::Module>>(llvm::LLVMContext &)>
+  Expected<bool> create(
+      function_ref<Expected<std::unique_ptr<llvm::Module>>(llvm::LLVMContext &)>
           CreateModule);
 
   llvm::Module &operator*() { return *Module; }
@@ -41,7 +41,7 @@ class LLVMModuleAndContext {
 
 private:
   /// LLVM context stored in a unique pointer so that we can move this type.
-  std::unique_ptr<llvm::LLVMContext> CTX =
+  std::unique_ptr<llvm::LLVMContext> Ctx =
       std::make_unique<llvm::LLVMContext>();
   /// The paired LLVM module.
   std::unique_ptr<llvm::Module> Module;
diff --git a/llvm/lib/Support/ModuleSplitter.cpp b/llvm/lib/Support/ModuleSplitter.cpp
index be85707386b0d..110062a6990b3 100644
--- a/llvm/lib/Support/ModuleSplitter.cpp
+++ b/llvm/lib/Support/ModuleSplitter.cpp
@@ -9,7 +9,816 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/ModuleSplitter.h"
-#include "llvm/ADT/STLExtras.h"
 
+#include "mlir/Support/LLVM.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/Transforms/Utils/SplitModule.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
 
 using namespace llvm;
+#define DEBUG_TYPE "llvm-module-split"
+
+//===----------------------------------------------------------------------===//
+// LLVMModuleAndContext
+//===----------------------------------------------------------------------===//
+
+Expected<bool> LLVMModuleAndContext::create(
+    function_ref<Expected<std::unique_ptr<llvm::Module>>(llvm::LLVMContext &)>
+        CreateModule) {
+  assert(!Module && "already have a module");
+  auto ModuleOr = CreateModule(*Ctx);
+  if (Error Err = ModuleOr.takeError())
+    return Err;
+
+  Module = std::move(*ModuleOr);
+  return true;
+}
+
+void LLVMModuleAndContext::reset() {
+  Module.reset();
+  Ctx.reset();
+}
+
+//===----------------------------------------------------------------------===//
+// StringConstantTable
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// Large strings are very inefficiently encoded in LLVM bitcode (each `char` is
+/// encoded as a `uint64_t`). The LLVM bitcode reader is also very inefficiently
+/// reads strings back, performing 3 ultimate copies of the data. This is made
+/// worse by the fact the `getLazyBitcodeModule` does not lazily parse constants
+/// from the LLVM bitcode. Thus, when per-function splitting a module with N
+/// functions and M large string constants, we form 3*M*N copies of the large
+/// strings.
+///
+/// This class is part of a workaround of this inefficiency. When processing a
+/// module for splitting, we track any string global constants and their indices
+/// in this table. If a module is going to be roundtripped through bitcode to be
+/// lazily loaded, we externalize the strings by setting the corresponding
+/// constants to `zeroinitializer` in the module before it is written to
+/// bitcode. As we materialize constants on the other side, we check for a
+/// materialized global variable that matches an entry in the string table and
+/// directly copy the data over into the new LLVM context.
+///
+/// We can generalize this optimization to other large data types as necessary.
+///
+/// This class is used in an `RCRef` to be shared across multiple threads.
+class StringConstantTable
+    : public ThreadSafeRefCountedBase<StringConstantTable> {
+  /// An entry in the string table consists of a global variable, its module
+  /// index, and the a reference to the string data. Because the string data is
+  /// owned by the original LLVM context, we have to ensure it stays alive.
+  struct Entry {
+    unsigned Idx;
+    const llvm::GlobalVariable *Var;
+    StringRef Value;
+  };
+
+public:
+  /// If `Value` denotes a string constant, record the data at index `GvIdx`.
+  void recordIfStringConstant(unsigned GvIdx, const llvm::GlobalValue &Value) {
+    auto Var = dyn_cast<llvm::GlobalVariable>(&Value);
+    if (Var && Var->isConstant() && Var->hasInternalLinkage()) {
+      auto *Init =
+          dyn_cast<llvm::ConstantDataSequential>(Var->getInitializer());
+      if (Init && Init->isCString())
+        StringConstants.push_back(Entry{GvIdx, Var, Init->getAsString()});
+    }
+  }
+
+  /// Before writing the main Module to bitcode, externalize large string
+  /// constants by stubbing out their values. Take ownership of the main Module
+  /// so the string data stays alive.
+  llvm::Module &externalizeStrings(LLVMModuleAndContext &&Module) {
+    MainModule = std::move(Module);
+    // Stub the initializers. The global variable is an internal constant, so it
+    // must have an initializer.
+    for (Entry &E : StringConstants) {
+      auto *Stub =
+          llvm::Constant::getNullValue(E.Var->getInitializer()->getType());
+      // `const_cast` is OK because we own the module now.
+      const_cast<llvm::GlobalVariable *>(E.Var)->setInitializer(Stub);
+    }
+    return *MainModule;
+  }
+
+  /// This is an iterator over the entries in the string table.
+  class Injector {
+    using const_iterator = std::vector<Entry>::const_iterator;
+
+  public:
+    /// Given a global variable in a materialized module and its index, if it is
+    /// a string constant found in the table, copy the data over into the new
+    /// LLVM context and set the initializer.
+    void materializeIfStringConstant(unsigned GvIdx,
+                                     llvm::GlobalVariable &Var) {
+      while (It != Et && It->Idx < GvIdx)
+        ++It;
+      if (It == Et || It->Idx != GvIdx)
+        return;
+      Var.setInitializer(llvm::ConstantDataArray::getString(
+          Var.getType()->getContext(), It->Value, /*AddNull=*/false));
+    }
+
+  private:
+    explicit Injector(const_iterator It, const_iterator Et) : It(It), Et(Et) {}
+
+    const_iterator It, Et;
+
+    friend class StringConstantTable;
+  };
+
+  Injector begin() const {
+    return Injector(StringConstants.begin(), StringConstants.end());
+  }
+
+private:
+  std::vector<Entry> StringConstants;
+  LLVMModuleAndContext MainModule;
+};
+
+//===----------------------------------------------------------------------===//
+// Module Splitter
+//===----------------------------------------------------------------------===//
+
+class LLVMModuleSplitterImpl {
+public:
+  explicit LLVMModuleSplitterImpl(LLVMModuleAndContext Module)
+      : MainModule(std::move(Module)) {}
+
+  /// Split the LLVM module into multiple modules using the provided process
+  /// function.
+  void split(LLVMSplitProcessFn ProcessFn,
+             llvm::SmallVectorImpl<llvm::Function> &Anchors);
+
+private:
+  struct ValueInfo {
+    /// The immediate global value dependencies of a value.
+    SmallVector<const llvm::GlobalValue *> Dependencies;
+    /// Map each global value to its index in the module. We will use this to
+    /// materialize global values from bitcode.
+    unsigned GvIdx;
+  };
+
+  struct TransitiveDeps {
+    /// The transitive dependencies.
+    llvm::MapVector<const llvm::GlobalValue *, unsigned> Deps;
+    /// True if computation is complete.
+    bool Complete = false;
+    /// The assigned module index.
+    std::optional<unsigned> MutIdx;
+  };
+
+  /// Collect the immediate global value dependencies of `Value`. `Orig` is the
+  /// original transitive value, which is not equal to `Value` when it is used
+  /// in a constant.
+  void collectImmediateDependencies(const llvm::Value *Value,
+                                    const llvm::GlobalValue *Orig);
+
+  /// The main LLVM module being split.
+  LLVMModuleAndContext MainModule;
+
+  /// The value info for each global value in the module.
+  llvm::DenseMap<const llvm::Value *, ValueInfo> Infos;
+
+  /// The transitive dependencies of each global value.
+  llvm::MapVector<const llvm::GlobalValue *, TransitiveDeps> TransDeps;
+
+  /// Users of split "anchors". These are global values where we don't want
+  /// their users to be split into different modules because it will cause the
+  /// symbol to be duplicated.
+  llvm::MapVector<const llvm::GlobalValue *, llvm::SetVector<TransitiveDeps *>>
+      SplitAnchorUsers;
+};
+} // namespace
+
+static LLVMModuleAndContext readAndMaterializeDependencies(
+    MemoryBuffer &Buf,
+    const llvm::MapVector<const llvm::GlobalValue *, unsigned> &Set,
+    const StringConstantTable &Strtab) {
+
+  // First, create a lazy module with an internal bitcode materializer.
+  // TODO: Not sure how to make lazy loading metadata work.
+  LLVMModuleAndContext Result;
+  {
+    (void)Result.create(
+        [&](llvm::LLVMContext &Ctx) -> Expected<std::unique_ptr<Module>> {
+          return llvm::cantFail(llvm::getLazyBitcodeModule(
+              llvm::MemoryBufferRef(Buf.getBuffer(), "<split-module>"), Ctx,
+              /*ShouldLazyLoadMetadata=*/false));
+        });
+    Result->setModuleInlineAsm("");
+  }
+
+  SmallVector<unsigned> SortIndices =
+      llvm::to_vector(llvm::make_second_range(Set));
+  llvm::sort(SortIndices, std::less<unsigned>());
+  auto IdxIt = SortIndices.begin();
+  auto IdxEnd = SortIndices.end();
+
+  // The global value indices go from globals, functions, then aliases. This
+  // mirrors the order in which global values are deleted by LLVM's GlobalDCE.
+  unsigned CurIdx = 0;
+  StringConstantTable::Injector It = Strtab.begin();
+  // We need to keep the IR "valid" for the verifier because `materializeAll`
+  // may invoke it. It doesn't matter since we're deleting the globals anyway.
+  for (llvm::GlobalVariable &Global : Result->globals()) {
+    if (IdxIt != IdxEnd && CurIdx == *IdxIt) {
+      ++IdxIt;
+      llvm::cantFail(Global.materialize());
+      It.materializeIfStringConstant(CurIdx, Global);
+    } else {
+      Global.setInitializer(nullptr);
+      Global.setComdat(nullptr);
+      Global.setLinkage(llvm::GlobalValue::ExternalLinkage);
+      // External link should not be DSOLocal anymore,
+      // otherwise position independent code generates
+      // `R_X86_64_PC32` instead of `R_X86_64_REX_GOTPCRELX`
+      // for these symbols and building shared library from
+      // a static archive of this module will error with an `fPIC` confusion.
+      Global.setDSOLocal(false);
+    }
+    ++CurIdx;
+  }
+  for (llvm::Function &Func : Result->functions()) {
+    if (IdxIt != IdxEnd && CurIdx == *IdxIt) {
+      ++IdxIt;
+      llvm::cantFail(Func.materialize());
+    } else {
+      Func.deleteBody();
+      Func.setComdat(nullptr);
+      Func.setLinkage(llvm::GlobalValue::ExternalLinkage);
+      // External link should not be DSOLocal anymore,
+      // otherwise position independent code generates
+      // `R_X86_64_PC32` instead of `R_X86_64_REX_GOTPCRELX`
+      // for these symbols and building shared library from
+      // a static archive of this module will error with an `fPIC` confusion.
+      // External link should not be DSOLocal anymore,
+      // otherwise position independent code generation get confused.
+      Func.setDSOLocal(false);
+    }
+    ++CurIdx;
+  }
+
+  // Finalize materialization of the module.
+  llvm::cantFail(Result->materializeAll());
+
+  // Now that the module is materialized, we can start deleting stuff. Just
+  // delete declarations with no uses.
+  for (llvm::GlobalVariable &Global :
+       llvm::make_early_inc_range(Result->globals())) {
+    if (Global.isDeclaration() && Global.use_empty())
+      Global.eraseFromParent();
+  }
+  for (llvm::Function &Func : llvm::make_early_inc_range(Result->functions())) {
+    if (Func.isDeclaration() && Func.use_empty())
+      Func.eraseFromParent();
+  }
+  return Result;
+}
+
+/// support for splitting an LLVM module into multiple parts using exported
+/// functions as anchors, and pull in all dependency on the call stack into one
+/// module.
+void splitPerAnchored(LLVMModuleAndContext Module, LLVMSplitProcessFn ProcessFn,
+                      llvm::SmallVectorImpl<llvm::Function> &Anchors) {
+  LLVMModuleSplitterImpl impl(std::move(Module));
+  impl.split(ProcessFn, Anchors);
+}
+
+void LLVMModuleSplitterImpl::split(
+    LLVMSplitProcessFn processFn,
+    llvm::SmallVectorImpl<llvm::Function> &Anchors) {
+  // The use-def list is sparse. Use it to build a sparse dependency graph
+  // between global values.
+  auto strtab = RCRef<StringConstantTable>::create();
+  unsigned gvIdx = 0;
+  auto computeDeps = [&](const llvm::GlobalValue &value) {
+    strtab->recordIfStringConstant(gvIdx, value);
+    infos[&value].gvIdx = gvIdx++;
+    collectImmediateDependencies(&value, &value);
+  };
+  // NOTE: The visitation of globals then functions has to line up with
+  // `readAndMaterializeDependencies`.
+  for (const llvm::GlobalVariable &global : mainModule->globals()) {
+    computeDeps(global);
+    if (!global.hasInternalLinkage() && !global.hasPrivateLinkage())
+      transitiveDeps[&global];
+  }
+  for (const llvm::Function &fn : mainModule->functions()) {
+    computeDeps(fn);
+    if (!fn.isDeclaration() && (fn.hasExternalLinkage() || fn.hasWeakLinkage()))
+      transitiveDeps[&fn];
+  }
+
+  // If there is only one (or fewer) exported functions, forward the main
+  // module.
+  if (transitiveDeps.size() <= 1)
+    return processFn(forwardModule(std::move(mainModule)), std::nullopt,
+                     /*numFunctionBase=*/0);
+
+  // Now for each export'd global value, compute the transitive set of
+  // dependencies using DFS.
+  SmallVector<const llvm::GlobalValue *> worklist;
+  for (auto &[value, deps] : transitiveDeps) {
+    worklist.clear();
+    worklist.push_back(value);
+    while (!worklist.empty()) {
+      const llvm::GlobalValue *it = worklist.pop_back_val();
+
+      auto [iter, inserted] = deps.deps.insert({it, -1});
+      if (!inserted) {
+        // Already visited.
+        continue;
+      }
+      // Pay the cost of the name lookup only on a miss.
+      const ValueInfo &info = infos.at(it);
+      iter->second = info.gvIdx;
+
+      // If this value depends on another value that is going to be split, we
+      // don't want to duplicate the symbol. Keep all the users together.
+      if (it != value) {
+        if (auto depIt = transitiveDeps.find(it);
+            depIt != transitiveDeps.end()) {
+          auto &users = splitAnchorUsers[it];
+          users.insert(&deps);
+          // Make sure to include the other value in its own user list.
+          users.insert(&depIt->second);
+          // We don't have to recurse since the subgraph will get processed.
+          continue;
+        }
+      }
+
+      // If this value depends on a mutable global, keep track of it. We have to
+      // put all users of a mutable global in the same module.
+      if (auto *global = dyn_cast<llvm::GlobalVariable>(it);
+          global && !global->isConstant())
+        splitAnchorUsers[global].insert(&deps);
+
+      // Recursive on dependencies.
+      llvm::append_range(worklist, info.dependencies);
+    }
+
+    deps.complete = true;
+  }
+
+  // For each mutable global, grab all the transitive users and put them in one
+  // module. If global A has user set A* and global B has user set B* where
+  // A* and B* have an empty intersection, all values in A* will be assigned 0
+  // and all values in B* will be assigned 1. If global C has user set C* that
+  // overlaps both A* and B*, it will overwrite both to 2.
+  SmallVector<SmallVector<TransitiveDeps *>> bucketing(splitAnchorUsers.size());
+  for (auto [curMutIdx, bucket, users] :
+       llvm::enumerate(bucketing, llvm::make_second_range(splitAnchorUsers))) {
+    for (TransitiveDeps *deps : users) {
+      if (deps->mutIdx && *deps->mutIdx != curMutIdx) {
+        auto &otherBucket = bucketing[*deps->mutIdx];
+        for (TransitiveDeps *other : otherBucket) {
+          bucket.push_back(other);
+          other->mutIdx = curMutIdx;
+        }
+        otherBucket.clear();
+        assert(*deps->mutIdx == curMutIdx);
+      } else {
+        bucket.push_back(deps);
+        deps->mutIdx = curMutIdx;
+      }
+    }
+  }
+
+  // Now that we have assigned buckets to each value, merge the transitive
+  // dependency sets of all values belonging to the same set.
+  SmallVector<llvm::MapVector<const llvm::GlobalValue *, unsigned>> buckets(
+      bucketing.size());
+  for (auto [deps, bucket] : llvm::zip(bucketing, buckets)) {
+    for (TransitiveDeps *dep : deps) {
+      for (auto &namedValue : dep->deps)
+        bucket.insert(namedValue);
+    }
+  }
+
+  SmallVector<llvm::MapVector<const llvm::GlobalValue *, unsigned> *>
+      setsToProcess;
+  setsToProcess.reserve(buckets.size() + transitiveDeps.size());
+
+  // Clone each mutable global bucket into its own module.
+  for (auto &bucket : buckets) {
+    if (bucket.empty())
+      continue;
+    setsToProcess.push_back(&bucket);
+  }
+
+  for (auto &[root, deps] : transitiveDeps) {
+    // Skip values included in another transitive dependency set and values
+    // included in mutable global sets.
+    if (!deps.mutIdx)
+      setsToProcess.push_back(&deps.deps);
+  }
+
+  if (setsToProcess.size() <= 1)
+    return processFn(forwardModule(std::move(mainModule)), std::nullopt,
+                     /*numFunctionBase=*/0);
+
+  // Sort the sets by to schedule the larger modules first.
+  llvm::sort(setsToProcess,
+             [](auto *lhs, auto *rhs) { return lhs->size() > rhs->size(); });
+
+  // Prepare to materialize slices of the module by first writing the main
+  // module as bitcode to a shared buffer.
+  auto buf = WriteableBuffer::get();
+  {
+    CompilerTimeTraceScope traceScope("writeMainModuleBitcode");
+    llvm::Module &module = strtab->externalizeStrings(std::move(mainModule));
+    llvm::WriteBitcodeToFile(module, *buf);
+  }
+
+  unsigned numFunctions = 0;
+  for (auto [idx, set] : llvm::enumerate(setsToProcess)) {
+    unsigned next = numFunctions + set->size();
+    auto makeModule = [set = std::move(*set), buf = BufferRef(buf.copy()),
+                       strtab = strtab.copy()]() mutable {
+      return readAndMaterializeDependencies(std::move(buf), set, *strtab,
+                                            /*ignoreFns=*/{});
+    };
+    processFn(std::move(makeModule), idx, numFunctions);
+    numFunctions = next;
+  }
+}
+
+void LLVMModuleSplitterImpl::collectImmediateDependencies(
+    const llvm::Value *value, const llvm::GlobalValue *orig) {
+  for (const llvm::Value *user : value->users()) {
+    // Recurse into pure constant users.
+    if (isa<llvm::Constant>(user) && !isa<llvm::GlobalValue>(user)) {
+      collectImmediateDependencies(user, orig);
+      continue;
+    }
+
+    if (auto *inst = dyn_cast<llvm::Instruction>(user)) {
+      const llvm::Function *func = inst->getParent()->getParent();
+      infos[func].dependencies.push_back(orig);
+    } else if (auto *globalVal = dyn_cast<llvm::GlobalValue>(user)) {
+      infos[globalVal].dependencies.push_back(orig);
+    } else {
+      llvm_unreachable("unexpected user of global value");
+    }
+  }
+}
+
+namespace {
+/// This class provides support for splitting an LLVM module into multiple
+/// parts.
+/// TODO: Clean up the splitters here (some code duplication) when we can move
+/// to per function llvm compilation.
+class LLVMModulePerFunctionSplitterImpl {
+public:
+  LLVMModulePerFunctionSplitterImpl(LLVMModuleAndContext module)
+      : mainModule(std::move(module)) {}
+
+  /// Split the LLVM module into multiple modules using the provided process
+  /// function.
+  void
+  split(LLVMSplitProcessFn processFn,
+        llvm::StringMap<llvm::GlobalValue::LinkageTypes> &symbolLinkageTypes,
+        unsigned numFunctionBase);
+
+private:
+  struct ValueInfo {
+    const llvm::Value *value = nullptr;
+    bool canBeSplit = true;
+    llvm::SmallPtrSet<const llvm::GlobalValue *, 4> dependencies;
+    llvm::SmallPtrSet<const llvm::GlobalValue *, 4> users;
+    /// Map each global value to its index in the module. We will use this to
+    /// materialize global values from bitcode.
+    unsigned gvIdx;
+    bool userEmpty = true;
+  };
+
+  /// Collect all of the immediate global value users of `value`.
+  void collectValueUsers(const llvm::GlobalValue *value);
+
+  /// Propagate use information through the module.
+  void propagateUseInfo();
+
+  /// The main LLVM module being split.
+  LLVMModuleAndContext mainModule;
+
+  /// The value info for each global value in the module.
+  llvm::MapVector<const llvm::GlobalValue *, ValueInfo> valueInfos;
+};
+} // namespace
+
+static void
+checkDuplicates(llvm::MapVector<const llvm::GlobalValue *, unsigned> &set,
+                llvm::StringSet<> &seenFns, llvm::StringSet<> &dupFns) {
+  for (auto [gv, _] : set) {
+    if (auto fn = dyn_cast<llvm::Function>(gv)) {
+      if (!seenFns.insert(fn->getName()).second) {
+        dupFns.insert(fn->getName());
+      }
+    }
+  }
+}
+
+/// support for splitting an LLVM module into multiple parts with each part
+/// contains only one function (with exception for coroutine related functions.)
+void KGEN::splitPerFunction(
+    LLVMModuleAndContext module, LLVMSplitProcessFn processFn,
+    llvm::StringMap<llvm::GlobalValue::LinkageTypes> &symbolLinkageTypes,
+    unsigned numFunctionBase) {
+  CompilerTimeTraceScope traceScope("splitPerFunction");
+  LLVMModulePerFunctionSplitterImpl impl(std::move(module));
+  impl.split(processFn, symbolLinkageTypes, numFunctionBase);
+}
+
+/// Split the LLVM module into multiple modules using the provided process
+/// function.
+void LLVMModulePerFunctionSplitterImpl::split(
+    LLVMSplitProcessFn processFn,
+    llvm::StringMap<llvm::GlobalValue::LinkageTypes> &symbolLinkageTypes,
+    unsigned numFunctionBase) {
+  // Compute the value info for each global in the module.
+  // NOTE: The visitation of globals then functions has to line up with
+  // `readAndMaterializeDependencies`.
+  auto strtab = RCRef<StringConstantTable>::create();
+  unsigned gvIdx = 0;
+  auto computeUsers = [&](const llvm::GlobalValue &value) {
+    strtab->recordIfStringConstant(gvIdx, value);
+    valueInfos[&value].gvIdx = gvIdx++;
+    collectValueUsers(&value);
+  };
+  llvm::for_each(mainModule->globals(), computeUsers);
+  llvm::for_each(mainModule->functions(), computeUsers);
+
+  // With use information collected, propagate it to the dependencies.
+  propagateUseInfo();
+
+  // Now we can split the module.
+  // We split the module per function and cloning any necessary dependencies:
+  // - For function dependencies, only clone the declaration unless its
+  //   coroutine related.
+  // - For other internal values, clone as is.
+  // This is much fine-grained splitting, which enables significantly higher
+  // levels of parallelism (and smaller generated artifacts).
+  // LLVM LTO style optimization may suffer a bit here since we don't have
+  // the full callstack present anymore in each cloned module.
+  llvm::DenseSet<const llvm::Value *> splitValues;
+  SmallVector<llvm::MapVector<const llvm::GlobalValue *, unsigned>>
+      setsToProcess;
+
+  // Hoist these collections to re-use memory allocations.
+  llvm::ValueToValueMapTy valueMap;
+  SmallPtrSet<const llvm::Value *, 4> splitDeps;
+  auto splitValue = [&](const llvm::GlobalValue *root) {
+    // If the function is already split, e.g. if it was a dependency of
+    // another function, skip it.
+    if (splitValues.count(root))
+      return;
+
+    auto &valueInfo = valueInfos[root];
+    valueMap.clear();
+    splitDeps.clear();
+    auto shouldSplit = [&](const llvm::GlobalValue *globalVal,
+                           const ValueInfo &info) {
+      // Only clone root and the declaration of its dependencies.
+      if (globalVal == root) {
+        splitDeps.insert(globalVal);
+        return true;
+      }
+
+      if ((info.canBeSplit || info.userEmpty) &&
+          isa_and_nonnull<llvm::Function>(globalVal))
+        return false;
+
+      if (valueInfo.dependencies.contains(globalVal)) {
+        splitDeps.insert(globalVal);
+        return true;
+      }
+
+      return false;
+    };
+
+    auto &set = setsToProcess.emplace_back();
+    for (auto &[globalVal, info] : valueInfos) {
+      if (shouldSplit(globalVal, info))
+        set.insert({globalVal, info.gvIdx});
+    }
+    if (set.empty())
+      setsToProcess.pop_back();
+
+    // Record the split values.
+    splitValues.insert(splitDeps.begin(), splitDeps.end());
+  };
+
+  [[maybe_unused]] int64_t count = 0;
+  SmallVector<const llvm::GlobalValue *> toSplit;
+  unsigned unnamedGlobal = numFunctionBase;
+  for (auto &global : mainModule->globals()) {
+    if (global.hasInternalLinkage() || global.hasPrivateLinkage()) {
+      if (!global.hasName()) {
+        // Give unnamed GlobalVariable a unique name so that MCLink will not get
+        // confused to name them while generating linked code since the IR
+        // values can be different in each splits (for X86 backend.)
+        // asan build inserts these unnamed GlobalVariables.
+        global.setName("__mojo_unnamed" + Twine(unnamedGlobal++));
+      }
+
+      symbolLinkageTypes.insert({global.getName().str(), global.getLinkage()});
+      global.setLinkage(llvm::GlobalValue::WeakAnyLinkage);
+      continue;
+    }
+
+    if (global.hasExternalLinkage())
+      continue;
+
+    // TODO: Add special handling for `llvm.global_ctors` and
+    // `llvm.global_dtors`, because otherwise they end up tying almost all
+    // symbols into the same split.
+    LLVM_DEBUG(llvm::dbgs()
+                   << (count++) << ": split global: " << global << "\n";);
+    toSplit.emplace_back(&global);
+  }
+
+  for (auto &fn : mainModule->functions()) {
+    if (fn.isDeclaration())
+      continue;
+
+    ValueInfo &info = valueInfos[&fn];
+    if (fn.hasInternalLinkage() || fn.hasPrivateLinkage()) {
+      // Avoid renaming when linking in MCLink.
+      symbolLinkageTypes.insert({fn.getName().str(), fn.getLinkage()});
+      fn.setLinkage(llvm::Function::LinkageTypes::WeakAnyLinkage);
+    }
+
+    if (info.canBeSplit || info.userEmpty) {
+      LLVM_DEBUG(llvm::dbgs()
+                     << (count++) << ": split fn: " << fn.getName() << "\n";);
+      toSplit.emplace_back(&fn);
+    }
+  }
+
+  // Run this now since we just changed the linkages.
+  for (const llvm::GlobalValue *value : toSplit)
+    splitValue(value);
+
+  if (setsToProcess.size() <= 1)
+    return processFn(forwardModule(std::move(mainModule)), std::nullopt,
+                     numFunctionBase);
+
+  auto duplicatedFns = std::move(mainModule.duplicatedFns);
+
+  // Prepare to materialize slices of the module by first writing the main
+  // module as bitcode to a shared buffer.
+  auto buf = WriteableBuffer::get();
+  {
+    CompilerTimeTraceScope traceScope("writeMainModuleBitcode");
+    llvm::Module &module = strtab->externalizeStrings(std::move(mainModule));
+    llvm::WriteBitcodeToFile(module, *buf);
+  }
+
+  unsigned numFunctions = numFunctionBase;
+  llvm::StringSet<> seenFns;
+  for (auto [idx, set] : llvm::enumerate(setsToProcess)) {
+    // Giving each function a unique ID across all splits for proper MC level
+    // linking and codegen into one object file where duplicated functions
+    // in each split will be deduplicated (with the linking).
+    llvm::StringSet<> currDuplicatedFns = duplicatedFns;
+    checkDuplicates(set, seenFns, currDuplicatedFns);
+
+    unsigned next = numFunctions + set.size();
+    auto makeModule = [set = std::move(set), buf = BufferRef(buf.copy()),
+                       strtab = strtab.copy(), currDuplicatedFns]() mutable {
+      return readAndMaterializeDependencies(std::move(buf), set, *strtab,
+                                            currDuplicatedFns);
+    };
+    processFn(std::move(makeModule), idx, numFunctions);
+    numFunctions = next;
+  }
+}
+
+/// Collect all of the immediate global value users of `value`.
+void LLVMModulePerFunctionSplitterImpl::collectValueUsers(
+    const llvm::GlobalValue *value) {
+  SmallVector<const llvm::User *> worklist(value->users());
+
+  while (!worklist.empty()) {
+    const llvm::User *userIt = worklist.pop_back_val();
+
+    // Recurse into pure constant users.
+    if (isa<llvm::Constant>(userIt) && !isa<llvm::GlobalValue>(userIt)) {
+      worklist.append(userIt->user_begin(), userIt->user_end());
+      continue;
+    }
+
+    if (const auto *inst = dyn_cast<llvm::Instruction>(userIt)) {
+      const llvm::Function *func = inst->getParent()->getParent();
+      valueInfos[value].users.insert(func);
+      valueInfos[func];
+    } else if (const auto *globalVal = dyn_cast<llvm::GlobalValue>(userIt)) {
+      valueInfos[value].users.insert(globalVal);
+      valueInfos[globalVal];
+    } else {
+      llvm_unreachable("unexpected user of global value");
+    }
+  }
+
+  // If the current value is a mutable global variable, then it can't be
+  // split.
+  if (auto *global = dyn_cast<llvm::GlobalVariable>(value))
+    valueInfos[value].canBeSplit = global->isConstant();
+}
+
+/// Propagate use information through the module.
+void LLVMModulePerFunctionSplitterImpl::propagateUseInfo() {
+  std::vector<ValueInfo *> worklist;
+
+  // Each value depends on itself. Seed the iteration with that.
+  for (auto &[value, info] : valueInfos) {
+    if (auto func = llvm::dyn_cast<llvm::Function>(value)) {
+      if (func->isDeclaration())
+        continue;
+    }
+
+    info.dependencies.insert(value);
+    info.value = value;
+    worklist.push_back(&info);
+    if (!info.canBeSplit) {
+      // If a value cannot be split, its users are also its dependencies.
+      llvm::set_union(info.dependencies, info.users);
+    }
+  }
+
+  while (!worklist.empty()) {
+    ValueInfo *info = worklist.back();
+    worklist.pop_back();
+
+    // Propagate the dependencies of this value to its users.
+    for (const llvm::GlobalValue *user : info->users) {
+      ValueInfo &userInfo = valueInfos.find(user)->second;
+      if (info == &userInfo)
+        continue;
+      bool changed = false;
+
+      // Merge dependency to user if current value is not a function that will
+      // be split into a separate module.
+      bool mergeToUserDep = true;
+      if (llvm::isa_and_nonnull<llvm::Function>(info->value)) {
+        mergeToUserDep = !info->canBeSplit;
+      }
+
+      // If there is a change, add the user info to the worklist.
+      if (mergeToUserDep) {
+        if (llvm::set_union(userInfo.dependencies, info->dependencies))
+          changed = true;
+      }
+
+      // If the value cannot be split, its users cannot be split either.
+      if (!info->canBeSplit && userInfo.canBeSplit) {
+        userInfo.canBeSplit = false;
+        changed = true;
+        // If a value cannot be split, its users are also its dependencies.
+        llvm::set_union(userInfo.dependencies, userInfo.users);
+      }
+
+      if (changed) {
+        userInfo.value = user;
+        worklist.push_back(&userInfo);
+      }
+    }
+
+    if (info->canBeSplit || isa_and_nonnull<llvm::GlobalValue>(info->value))
+      continue;
+
+    // If a value cannot be split, propagate its dependencies up to its
+    // dependencies.
+    for (const llvm::GlobalValue *dep : info->dependencies) {
+      ValueInfo &depInfo = valueInfos.find(dep)->second;
+      if (info == &depInfo)
+        continue;
+      if (llvm::set_union(depInfo.dependencies, info->dependencies)) {
+        depInfo.value = dep;
+        worklist.push_back(&depInfo);
+      }
+    }
+  }
+
+  for (auto &[value, info] : valueInfos) {
+    info.userEmpty = info.users.empty() ||
+                     (info.users.size() == 1 && info.users.contains(value));
+  }
+}

>From b84346d9e162653fa8c268ff6327e6980046e878 Mon Sep 17 00:00:00 2001
From: Weiwei Chen <weiwei.chen at modular.com>
Date: Sun, 5 Jan 2025 23:16:56 -0500
Subject: [PATCH 04/17] checkpoint.

---
 llvm/lib/Support/ModuleSplitter.cpp | 155 ++++++++++++++--------------
 1 file changed, 78 insertions(+), 77 deletions(-)

diff --git a/llvm/lib/Support/ModuleSplitter.cpp b/llvm/lib/Support/ModuleSplitter.cpp
index 110062a6990b3..1fc45f415e3ac 100644
--- a/llvm/lib/Support/ModuleSplitter.cpp
+++ b/llvm/lib/Support/ModuleSplitter.cpp
@@ -24,6 +24,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"
 #include "llvm/Transforms/Utils/SplitModule.h"
@@ -208,7 +209,7 @@ class LLVMModuleSplitterImpl {
 } // namespace
 
 static LLVMModuleAndContext readAndMaterializeDependencies(
-    MemoryBuffer &Buf,
+    WritableMemoryBuffer &Buf,
     const llvm::MapVector<const llvm::GlobalValue *, unsigned> &Set,
     const StringConstantTable &Strtab) {
 
@@ -219,7 +220,7 @@ static LLVMModuleAndContext readAndMaterializeDependencies(
     (void)Result.create(
         [&](llvm::LLVMContext &Ctx) -> Expected<std::unique_ptr<Module>> {
           return llvm::cantFail(llvm::getLazyBitcodeModule(
-              llvm::MemoryBufferRef(Buf.getBuffer(), "<split-module>"), Ctx,
+              llvm::MemoryBufferRef(Buf.MemoryBuffer::getBuffer(), "<split-module>"), Ctx,
               /*ShouldLazyLoadMetadata=*/false));
         });
     Result->setModuleInlineAsm("");
@@ -228,8 +229,8 @@ static LLVMModuleAndContext readAndMaterializeDependencies(
   SmallVector<unsigned> SortIndices =
       llvm::to_vector(llvm::make_second_range(Set));
   llvm::sort(SortIndices, std::less<unsigned>());
-  auto IdxIt = SortIndices.begin();
-  auto IdxEnd = SortIndices.end();
+  auto* IdxIt = SortIndices.begin();
+  auto* IdxEnd = SortIndices.end();
 
   // The global value indices go from globals, functions, then aliases. This
   // mirrors the order in which global values are deleted by LLVM's GlobalDCE.
@@ -297,68 +298,69 @@ static LLVMModuleAndContext readAndMaterializeDependencies(
 /// module.
 void splitPerAnchored(LLVMModuleAndContext Module, LLVMSplitProcessFn ProcessFn,
                       llvm::SmallVectorImpl<llvm::Function> &Anchors) {
-  LLVMModuleSplitterImpl impl(std::move(Module));
-  impl.split(ProcessFn, Anchors);
+  LLVMModuleSplitterImpl Impl(std::move(Module));
+  Impl.split(ProcessFn, Anchors);
 }
 
 void LLVMModuleSplitterImpl::split(
-    LLVMSplitProcessFn processFn,
+    LLVMSplitProcessFn ProcessFn,
     llvm::SmallVectorImpl<llvm::Function> &Anchors) {
   // The use-def list is sparse. Use it to build a sparse dependency graph
   // between global values.
-  auto strtab = RCRef<StringConstantTable>::create();
-  unsigned gvIdx = 0;
-  auto computeDeps = [&](const llvm::GlobalValue &value) {
-    strtab->recordIfStringConstant(gvIdx, value);
-    infos[&value].gvIdx = gvIdx++;
+  IntrusiveRefCntPtr<StringConstantTable> Strtab(new StringConstantTable());
+  unsigned GvIdx = 0;
+
+  auto ComputeDeps = [&](const llvm::GlobalValue &value) {
+    Strtab->recordIfStringConstant(GvIdx, value);
+    Infos[&value].GvIdx = GvIdx++;
     collectImmediateDependencies(&value, &value);
   };
   // NOTE: The visitation of globals then functions has to line up with
   // `readAndMaterializeDependencies`.
-  for (const llvm::GlobalVariable &global : mainModule->globals()) {
-    computeDeps(global);
+  for (const llvm::GlobalVariable &global : MainModule->globals()) {
+    ComputeDeps(global);
     if (!global.hasInternalLinkage() && !global.hasPrivateLinkage())
-      transitiveDeps[&global];
+      TransDeps[&global];
   }
-  for (const llvm::Function &fn : mainModule->functions()) {
-    computeDeps(fn);
-    if (!fn.isDeclaration() && (fn.hasExternalLinkage() || fn.hasWeakLinkage()))
-      transitiveDeps[&fn];
+  for (const llvm::Function &Fn : MainModule->functions()) {
+    ComputeDeps(Fn);
+    if (!Fn.isDeclaration() && (Fn.hasExternalLinkage() || Fn.hasWeakLinkage()))
+      TransDeps[&Fn];
   }
 
   // If there is only one (or fewer) exported functions, forward the main
   // module.
-  if (transitiveDeps.size() <= 1)
-    return processFn(forwardModule(std::move(mainModule)), std::nullopt,
+  if (TransDeps.size() <= 1)
+    return ProcessFn(forwardModule(std::move(MainModule)), std::nullopt,
                      /*numFunctionBase=*/0);
 
   // Now for each export'd global value, compute the transitive set of
   // dependencies using DFS.
-  SmallVector<const llvm::GlobalValue *> worklist;
-  for (auto &[value, deps] : transitiveDeps) {
-    worklist.clear();
-    worklist.push_back(value);
-    while (!worklist.empty()) {
-      const llvm::GlobalValue *it = worklist.pop_back_val();
-
-      auto [iter, inserted] = deps.deps.insert({it, -1});
+  SmallVector<const llvm::GlobalValue *> Worklist;
+  for (auto &[Value, Deps] : TransDeps) {
+    Worklist.clear();
+    Worklist.push_back(Value);
+    while (!Worklist.empty()) {
+      const llvm::GlobalValue *It = Worklist.pop_back_val();
+
+      auto [iter, inserted] = Deps.Deps.insert({It, -1});
       if (!inserted) {
         // Already visited.
         continue;
       }
       // Pay the cost of the name lookup only on a miss.
-      const ValueInfo &info = infos.at(it);
-      iter->second = info.gvIdx;
+      const ValueInfo &Info = Infos.at(It);
+      iter->second = Info.GvIdx;
 
       // If this value depends on another value that is going to be split, we
       // don't want to duplicate the symbol. Keep all the users together.
-      if (it != value) {
-        if (auto depIt = transitiveDeps.find(it);
-            depIt != transitiveDeps.end()) {
-          auto &users = splitAnchorUsers[it];
-          users.insert(&deps);
+      if (It != Value) {
+        if (auto* DepIt = TransDeps.find(It);
+            DepIt != TransDeps.end()) {
+          auto &Users = SplitAnchorUsers[It];
+          Users.insert(&Deps);
           // Make sure to include the other value in its own user list.
-          users.insert(&depIt->second);
+          Users.insert(&DepIt->second);
           // We don't have to recurse since the subgraph will get processed.
           continue;
         }
@@ -366,15 +368,15 @@ void LLVMModuleSplitterImpl::split(
 
       // If this value depends on a mutable global, keep track of it. We have to
       // put all users of a mutable global in the same module.
-      if (auto *global = dyn_cast<llvm::GlobalVariable>(it);
-          global && !global->isConstant())
-        splitAnchorUsers[global].insert(&deps);
+      if (auto *Global = dyn_cast<llvm::GlobalVariable>(It);
+          Global && !Global->isConstant())
+        SplitAnchorUsers[Global].insert(&Deps);
 
       // Recursive on dependencies.
-      llvm::append_range(worklist, info.dependencies);
+      llvm::append_range(Worklist, Info.Dependencies);
     }
 
-    deps.complete = true;
+    Deps.Complete = true;
   }
 
   // For each mutable global, grab all the transitive users and put them in one
@@ -382,69 +384,68 @@ void LLVMModuleSplitterImpl::split(
   // A* and B* have an empty intersection, all values in A* will be assigned 0
   // and all values in B* will be assigned 1. If global C has user set C* that
   // overlaps both A* and B*, it will overwrite both to 2.
-  SmallVector<SmallVector<TransitiveDeps *>> bucketing(splitAnchorUsers.size());
-  for (auto [curMutIdx, bucket, users] :
-       llvm::enumerate(bucketing, llvm::make_second_range(splitAnchorUsers))) {
-    for (TransitiveDeps *deps : users) {
-      if (deps->mutIdx && *deps->mutIdx != curMutIdx) {
-        auto &otherBucket = bucketing[*deps->mutIdx];
-        for (TransitiveDeps *other : otherBucket) {
-          bucket.push_back(other);
-          other->mutIdx = curMutIdx;
+  SmallVector<SmallVector<TransitiveDeps *>> Bucketing(SplitAnchorUsers.size());
+  for (auto [CurMutIdx, Bucket, Users] :
+       llvm::enumerate(Bucketing, llvm::make_second_range(SplitAnchorUsers))) {
+    for (TransitiveDeps *Deps : Users) {
+      if (Deps->MutIdx && *Deps->MutIdx != CurMutIdx) {
+        auto &OtherBucket = Bucketing[*Deps->MutIdx];
+        for (TransitiveDeps *Other : OtherBucket) {
+          Bucket.push_back(Other);
+          Other->MutIdx = CurMutIdx;
         }
-        otherBucket.clear();
-        assert(*deps->mutIdx == curMutIdx);
+        OtherBucket.clear();
+        assert(*Deps->MutIdx == CurMutIdx);
       } else {
-        bucket.push_back(deps);
-        deps->mutIdx = curMutIdx;
+        Bucket.push_back(Deps);
+        Deps->MutIdx = CurMutIdx;
       }
     }
   }
 
   // Now that we have assigned buckets to each value, merge the transitive
   // dependency sets of all values belonging to the same set.
-  SmallVector<llvm::MapVector<const llvm::GlobalValue *, unsigned>> buckets(
-      bucketing.size());
-  for (auto [deps, bucket] : llvm::zip(bucketing, buckets)) {
-    for (TransitiveDeps *dep : deps) {
-      for (auto &namedValue : dep->deps)
-        bucket.insert(namedValue);
+  SmallVector<llvm::MapVector<const llvm::GlobalValue *, unsigned>> Buckets(
+      Bucketing.size());
+  for (auto [Deps, Bucket] : llvm::zip(Bucketing, Buckets)) {
+    for (TransitiveDeps *Dep : Deps) {
+      for (auto &NamedValue : Dep->Deps)
+        Bucket.insert(NamedValue);
     }
   }
 
   SmallVector<llvm::MapVector<const llvm::GlobalValue *, unsigned> *>
-      setsToProcess;
-  setsToProcess.reserve(buckets.size() + transitiveDeps.size());
+      SetsToProcess;
+  SetsToProcess.reserve(Buckets.size() + TransDeps.size());
 
   // Clone each mutable global bucket into its own module.
-  for (auto &bucket : buckets) {
-    if (bucket.empty())
+  for (auto &Bucket : Buckets) {
+    if (Bucket.empty())
       continue;
-    setsToProcess.push_back(&bucket);
+    SetsToProcess.push_back(&Bucket);
   }
 
-  for (auto &[root, deps] : transitiveDeps) {
+  for (auto &[Root, Deps] : TransDeps) {
     // Skip values included in another transitive dependency set and values
     // included in mutable global sets.
-    if (!deps.mutIdx)
-      setsToProcess.push_back(&deps.deps);
+    if (!Deps.MutIdx)
+      SetsToProcess.push_back(&Deps.Deps);
   }
 
-  if (setsToProcess.size() <= 1)
-    return processFn(forwardModule(std::move(mainModule)), std::nullopt,
+  if (SetsToProcess.size() <= 1)
+    return ProcessFn(forwardModule(std::move(MainModule)), std::nullopt,
                      /*numFunctionBase=*/0);
 
   // Sort the sets by to schedule the larger modules first.
-  llvm::sort(setsToProcess,
-             [](auto *lhs, auto *rhs) { return lhs->size() > rhs->size(); });
+  llvm::sort(SetsToProcess,
+             [](auto *Lhs, auto *Rhs) { return Lhs->size() > Rhs->size(); });
 
   // Prepare to materialize slices of the module by first writing the main
   // module as bitcode to a shared buffer.
-  auto buf = WriteableBuffer::get();
+  auto Buf = WritableMemoryBuffer::getNewMemBuffer(size_t Size);
   {
-    CompilerTimeTraceScope traceScope("writeMainModuleBitcode");
-    llvm::Module &module = strtab->externalizeStrings(std::move(mainModule));
-    llvm::WriteBitcodeToFile(module, *buf);
+    llvm::Module &Module = Strtab->externalizeStrings(std::move(MainModule));
+    llvm::WriteBitcodeToFile(Module, *Buf);
   }
 
   unsigned numFunctions = 0;

>From a43cf9acc8ead7a7cd8ab0b922bdc7bcf78aa349 Mon Sep 17 00:00:00 2001
From: Weiwei Chen <weiwei.chen at modular.com>
Date: Fri, 14 Mar 2025 16:48:24 -0400
Subject: [PATCH 05/17] Move source code around.

---
 llvm/include/llvm/IR/ModuleSplitter.h         |  78 ++
 .../llvm/ModuleSplitter/ModuleSplitter.h      |  78 ++
 llvm/lib/IR/ModuleSplitter.cpp                | 815 ++++++++++++++++++
 llvm/lib/ModuleSplitter/CMakeLists.txt        |  13 +
 llvm/lib/ModuleSplitter/ModuleSplitter.cpp    | 814 +++++++++++++++++
 5 files changed, 1798 insertions(+)
 create mode 100644 llvm/include/llvm/IR/ModuleSplitter.h
 create mode 100644 llvm/include/llvm/ModuleSplitter/ModuleSplitter.h
 create mode 100644 llvm/lib/IR/ModuleSplitter.cpp
 create mode 100644 llvm/lib/ModuleSplitter/CMakeLists.txt
 create mode 100644 llvm/lib/ModuleSplitter/ModuleSplitter.cpp

diff --git a/llvm/include/llvm/IR/ModuleSplitter.h b/llvm/include/llvm/IR/ModuleSplitter.h
new file mode 100644
index 0000000000000..912d8edb7c189
--- /dev/null
+++ b/llvm/include/llvm/IR/ModuleSplitter.h
@@ -0,0 +1,78 @@
+//===- ModuleSplitter.h - Module Splitter Functions -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MODULESPLITTER_H
+#define LLVM_SUPPORT_MODULESPLITTER_H
+
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Error.h"
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// LLVMModuleAndContext
+//===----------------------------------------------------------------------===//
+
+/// A pair of an LLVM module and the LLVM context that holds ownership of the
+/// objects. This is a useful class for parallelizing LLVM and managing
+/// ownership of LLVM instances.
+class LLVMModuleAndContext {
+public:
+  /// Expose the underlying LLVM context to create the module. This is the only
+  /// way to access the LLVM context to prevent accidental sharing.
+  Expected<bool> create(
+      function_ref<Expected<std::unique_ptr<llvm::Module>>(llvm::LLVMContext &)>
+          CreateModule);
+
+  llvm::Module &operator*() { return *Module; }
+  llvm::Module *operator->() { return Module.get(); }
+
+  void reset();
+
+private:
+  /// LLVM context stored in a unique pointer so that we can move this type.
+  std::unique_ptr<llvm::LLVMContext> Ctx =
+      std::make_unique<llvm::LLVMContext>();
+  /// The paired LLVM module.
+  std::unique_ptr<llvm::Module> Module;
+};
+
+//===----------------------------------------------------------------------===//
+// Module Splitter
+//===----------------------------------------------------------------------===//
+
+using LLVMSplitProcessFn =
+    function_ref<void(llvm::unique_function<LLVMModuleAndContext()>,
+                      std::optional<int64_t>, unsigned)>;
+
+/// Helper to create a lambda that just forwards a preexisting Module.
+inline llvm::unique_function<LLVMModuleAndContext()>
+forwardModule(LLVMModuleAndContext &&Module) {
+  return [Module = std::move(Module)]() mutable { return std::move(Module); };
+}
+
+/// Support for splitting an LLVM module into multiple parts using anchored
+/// functions (e.g. exported functions), and pull in all dependency on the
+// call stack into one module.
+void splitPerAnchored(LLVMModuleAndContext Module,
+                      LLVMSplitProcessFn ProcessFn,
+                      llvm::SmallVectorImpl<llvm::Function>& Anchors);
+
+/// Support for splitting an LLVM module into multiple parts with each part
+/// contains only one function.
+void splitPerFunction(
+    LLVMModuleAndContext Module, LLVMSplitProcessFn ProcessFn);
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/ModuleSplitter/ModuleSplitter.h b/llvm/include/llvm/ModuleSplitter/ModuleSplitter.h
new file mode 100644
index 0000000000000..912d8edb7c189
--- /dev/null
+++ b/llvm/include/llvm/ModuleSplitter/ModuleSplitter.h
@@ -0,0 +1,78 @@
+//===- ModuleSplitter.h - Module Splitter Functions -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MODULESPLITTER_H
+#define LLVM_SUPPORT_MODULESPLITTER_H
+
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Error.h"
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// LLVMModuleAndContext
+//===----------------------------------------------------------------------===//
+
+/// A pair of an LLVM module and the LLVM context that holds ownership of the
+/// objects. This is a useful class for parallelizing LLVM and managing
+/// ownership of LLVM instances.
+class LLVMModuleAndContext {
+public:
+  /// Expose the underlying LLVM context to create the module. This is the only
+  /// way to access the LLVM context to prevent accidental sharing.
+  Expected<bool> create(
+      function_ref<Expected<std::unique_ptr<llvm::Module>>(llvm::LLVMContext &)>
+          CreateModule);
+
+  llvm::Module &operator*() { return *Module; }
+  llvm::Module *operator->() { return Module.get(); }
+
+  void reset();
+
+private:
+  /// LLVM context stored in a unique pointer so that we can move this type.
+  std::unique_ptr<llvm::LLVMContext> Ctx =
+      std::make_unique<llvm::LLVMContext>();
+  /// The paired LLVM module.
+  std::unique_ptr<llvm::Module> Module;
+};
+
+//===----------------------------------------------------------------------===//
+// Module Splitter
+//===----------------------------------------------------------------------===//
+
+using LLVMSplitProcessFn =
+    function_ref<void(llvm::unique_function<LLVMModuleAndContext()>,
+                      std::optional<int64_t>, unsigned)>;
+
+/// Helper to create a lambda that just forwards a preexisting Module.
+inline llvm::unique_function<LLVMModuleAndContext()>
+forwardModule(LLVMModuleAndContext &&Module) {
+  return [Module = std::move(Module)]() mutable { return std::move(Module); };
+}
+
+/// Support for splitting an LLVM module into multiple parts using anchored
+/// functions (e.g. exported functions), and pull in all dependency on the
+// call stack into one module.
+void splitPerAnchored(LLVMModuleAndContext Module,
+                      LLVMSplitProcessFn ProcessFn,
+                      llvm::SmallVectorImpl<llvm::Function>& Anchors);
+
+/// Support for splitting an LLVM module into multiple parts with each part
+/// contains only one function.
+void splitPerFunction(
+    LLVMModuleAndContext Module, LLVMSplitProcessFn ProcessFn);
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/IR/ModuleSplitter.cpp b/llvm/lib/IR/ModuleSplitter.cpp
new file mode 100644
index 0000000000000..1778c0d4a2278
--- /dev/null
+++ b/llvm/lib/IR/ModuleSplitter.cpp
@@ -0,0 +1,815 @@
+//===--- ModuleSplitter.cpp - Module Splitter -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ModuleSplitter.h"
+
+#include "mlir/Support/LLVM.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/Transforms/Utils/SplitModule.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+
+using namespace llvm;
+#define DEBUG_TYPE "llvm-module-split"
+
+//===----------------------------------------------------------------------===//
+// LLVMModuleAndContext
+//===----------------------------------------------------------------------===//
+
+Expected<bool> LLVMModuleAndContext::create(
+    function_ref<Expected<std::unique_ptr<llvm::Module>>(llvm::LLVMContext &)>
+        CreateModule) {
+  assert(!Module && "already have a module");
+  auto ModuleOr = CreateModule(*Ctx);
+  if (Error Err = ModuleOr.takeError())
+    return Err;
+
+  Module = std::move(*ModuleOr);
+  return true;
+}
+
+void LLVMModuleAndContext::reset() {
+  Module.reset();
+  Ctx.reset();
+}
+
+//===----------------------------------------------------------------------===//
+// StringConstantTable
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// Large strings are very inefficiently encoded in LLVM bitcode (each `char` is
+/// encoded as a `uint64_t`). The LLVM bitcode reader is also very inefficiently
+/// reads strings back, performing 3 ultimate copies of the data. This is made
+/// worse by the fact the `getLazyBitcodeModule` does not lazily parse constants
+/// from the LLVM bitcode. Thus, when per-function splitting a module with N
+/// functions and M large string constants, we form 3*M*N copies of the large
+/// strings.
+///
+/// This class is part of a workaround of this inefficiency. When processing a
+/// module for splitting, we track any string global constants and their indices
+/// in this table. If a module is going to be roundtripped through bitcode to be
+/// lazily loaded, we externalize the strings by setting the corresponding
+/// constants to `zeroinitializer` in the module before it is written to
+/// bitcode. As we materialize constants on the other side, we check for a
+/// materialized global variable that matches an entry in the string table and
+/// directly copy the data over into the new LLVM context.
+///
+/// We can generalize this optimization to other large data types as necessary.
+///
+/// This class is used in an `RCRef` to be shared across multiple threads.
+class StringConstantTable
+    : public ThreadSafeRefCountedBase<StringConstantTable> {
+  /// An entry in the string table consists of a global variable, its module
+  /// index, and the a reference to the string data. Because the string data is
+  /// owned by the original LLVM context, we have to ensure it stays alive.
+  struct Entry {
+    unsigned Idx;
+    const llvm::GlobalVariable *Var;
+    StringRef Value;
+  };
+
+public:
+  /// If `Value` denotes a string constant, record the data at index `GvIdx`.
+  void recordIfStringConstant(unsigned GvIdx, const llvm::GlobalValue &Value) {
+    auto Var = dyn_cast<llvm::GlobalVariable>(&Value);
+    if (Var && Var->isConstant() && Var->hasInternalLinkage()) {
+      auto *Init =
+          dyn_cast<llvm::ConstantDataSequential>(Var->getInitializer());
+      if (Init && Init->isCString())
+        StringConstants.push_back(Entry{GvIdx, Var, Init->getAsString()});
+    }
+  }
+
+  /// Before writing the main Module to bitcode, externalize large string
+  /// constants by stubbing out their values. Take ownership of the main Module
+  /// so the string data stays alive.
+  llvm::Module &externalizeStrings(LLVMModuleAndContext &&Module) {
+    MainModule = std::move(Module);
+    // Stub the initializers. The global variable is an internal constant, so it
+    // must have an initializer.
+    for (Entry &E : StringConstants) {
+      auto *Stub =
+          llvm::Constant::getNullValue(E.Var->getInitializer()->getType());
+      // `const_cast` is OK because we own the module now.
+      const_cast<llvm::GlobalVariable *>(E.Var)->setInitializer(Stub);
+    }
+    return *MainModule;
+  }
+
+  /// This is an iterator over the entries in the string table.
+  class Injector {
+    using const_iterator = std::vector<Entry>::const_iterator;
+
+  public:
+    /// Given a global variable in a materialized module and its index, if it is
+    /// a string constant found in the table, copy the data over into the new
+    /// LLVM context and set the initializer.
+    void materializeIfStringConstant(unsigned GvIdx,
+                                     llvm::GlobalVariable &Var) {
+      while (It != Et && It->Idx < GvIdx)
+        ++It;
+      if (It == Et || It->Idx != GvIdx)
+        return;
+      Var.setInitializer(llvm::ConstantDataArray::getString(
+          Var.getType()->getContext(), It->Value, /*AddNull=*/false));
+    }
+
+  private:
+    explicit Injector(const_iterator It, const_iterator Et) : It(It), Et(Et) {}
+
+    const_iterator It, Et;
+
+    friend class StringConstantTable;
+  };
+
+  Injector begin() const {
+    return Injector(StringConstants.begin(), StringConstants.end());
+  }
+
+private:
+  std::vector<Entry> StringConstants;
+  LLVMModuleAndContext MainModule;
+};
+
+//===----------------------------------------------------------------------===//
+// Module Splitter
+//===----------------------------------------------------------------------===//
+
+class LLVMModuleSplitterImpl {
+public:
+  explicit LLVMModuleSplitterImpl(LLVMModuleAndContext Module)
+      : MainModule(std::move(Module)) {}
+
+  /// Split the LLVM module into multiple modules using the provided process
+  /// function.
+  void split(LLVMSplitProcessFn ProcessFn,
+             llvm::SmallVectorImpl<llvm::Function> &Anchors);
+
+private:
+  struct ValueInfo {
+    /// The immediate global value dependencies of a value.
+    SmallVector<const llvm::GlobalValue *> Dependencies;
+    /// Map each global value to its index in the module. We will use this to
+    /// materialize global values from bitcode.
+    unsigned GvIdx;
+  };
+
+  struct TransitiveDeps {
+    /// The transitive dependencies.
+    llvm::MapVector<const llvm::GlobalValue *, unsigned> Deps;
+    /// True if computation is complete.
+    bool Complete = false;
+    /// The assigned module index.
+    std::optional<unsigned> MutIdx;
+  };
+
+  /// Collect the immediate global value dependencies of `Value`. `Orig` is the
+  /// original transitive value, which is not equal to `Value` when it is used
+  /// in a constant.
+  void collectImmediateDependencies(const llvm::Value *Value,
+                                    const llvm::GlobalValue *Orig);
+
+  /// The main LLVM module being split.
+  LLVMModuleAndContext MainModule;
+
+  /// The value info for each global value in the module.
+  llvm::DenseMap<const llvm::Value *, ValueInfo> Infos;
+
+  /// The transitive dependencies of each global value.
+  llvm::MapVector<const llvm::GlobalValue *, TransitiveDeps> TransDeps;
+
+  /// Users of split "anchors". These are global values where we don't want
+  /// their users to be split into different modules because it will cause the
+  /// symbol to be duplicated.
+  llvm::MapVector<const llvm::GlobalValue *, llvm::SetVector<TransitiveDeps *>>
+      SplitAnchorUsers;
+};
+} // namespace
+
+static LLVMModuleAndContext readAndMaterializeDependencies(
+    MemoryBufferRef &Buf,
+    const llvm::MapVector<const llvm::GlobalValue *, unsigned> &Set,
+    const StringConstantTable &Strtab) {
+
+  // First, create a lazy module with an internal bitcode materializer.
+  // TODO: Not sure how to make lazy loading metadata work.
+  LLVMModuleAndContext Result;
+  {
+    (void)Result.create(
+        [&](llvm::LLVMContext &Ctx) -> Expected<std::unique_ptr<Module>> {
+          return llvm::cantFail(
+              llvm::getLazyBitcodeModule(Buf, Ctx,
+                                         /*ShouldLazyLoadMetadata=*/false));
+        });
+    Result->setModuleInlineAsm("");
+  }
+
+  SmallVector<unsigned> SortIndices =
+      llvm::to_vector(llvm::make_second_range(Set));
+  llvm::sort(SortIndices, std::less<unsigned>());
+  auto* IdxIt = SortIndices.begin();
+  auto* IdxEnd = SortIndices.end();
+
+  // The global value indices go from globals, functions, then aliases. This
+  // mirrors the order in which global values are deleted by LLVM's GlobalDCE.
+  unsigned CurIdx = 0;
+  StringConstantTable::Injector It = Strtab.begin();
+  // We need to keep the IR "valid" for the verifier because `materializeAll`
+  // may invoke it. It doesn't matter since we're deleting the globals anyway.
+  for (llvm::GlobalVariable &Global : Result->globals()) {
+    if (IdxIt != IdxEnd && CurIdx == *IdxIt) {
+      ++IdxIt;
+      llvm::cantFail(Global.materialize());
+      It.materializeIfStringConstant(CurIdx, Global);
+    } else {
+      Global.setInitializer(nullptr);
+      Global.setComdat(nullptr);
+      Global.setLinkage(llvm::GlobalValue::ExternalLinkage);
+      // External link should not be DSOLocal anymore,
+      // otherwise position independent code generates
+      // `R_X86_64_PC32` instead of `R_X86_64_REX_GOTPCRELX`
+      // for these symbols and building shared library from
+      // a static archive of this module will error with an `fPIC` confusion.
+      Global.setDSOLocal(false);
+    }
+    ++CurIdx;
+  }
+  for (llvm::Function &Func : Result->functions()) {
+    if (IdxIt != IdxEnd && CurIdx == *IdxIt) {
+      ++IdxIt;
+      llvm::cantFail(Func.materialize());
+    } else {
+      Func.deleteBody();
+      Func.setComdat(nullptr);
+      Func.setLinkage(llvm::GlobalValue::ExternalLinkage);
+      // External link should not be DSOLocal anymore,
+      // otherwise position independent code generates
+      // `R_X86_64_PC32` instead of `R_X86_64_REX_GOTPCRELX`
+      // for these symbols and building shared library from
+      // a static archive of this module will error with an `fPIC` confusion.
+      // External link should not be DSOLocal anymore,
+      // otherwise position independent code generation get confused.
+      Func.setDSOLocal(false);
+    }
+    ++CurIdx;
+  }
+
+  // Finalize materialization of the module.
+  llvm::cantFail(Result->materializeAll());
+
+  // Now that the module is materialized, we can start deleting stuff. Just
+  // delete declarations with no uses.
+  for (llvm::GlobalVariable &Global :
+       llvm::make_early_inc_range(Result->globals())) {
+    if (Global.isDeclaration() && Global.use_empty())
+      Global.eraseFromParent();
+  }
+  for (llvm::Function &Func : llvm::make_early_inc_range(Result->functions())) {
+    if (Func.isDeclaration() && Func.use_empty())
+      Func.eraseFromParent();
+  }
+  return Result;
+}
+
+/// support for splitting an LLVM module into multiple parts using exported
+/// functions as anchors, and pull in all dependency on the call stack into one
+/// module.
+void splitPerAnchored(LLVMModuleAndContext Module, LLVMSplitProcessFn ProcessFn,
+                      llvm::SmallVectorImpl<llvm::Function> &Anchors) {
+  LLVMModuleSplitterImpl Impl(std::move(Module));
+  Impl.split(ProcessFn, Anchors);
+}
+
+void LLVMModuleSplitterImpl::split(
+    LLVMSplitProcessFn ProcessFn,
+    llvm::SmallVectorImpl<llvm::Function> &Anchors) {
+  // The use-def list is sparse. Use it to build a sparse dependency graph
+  // between global values.
+  IntrusiveRefCntPtr<StringConstantTable> Strtab(new StringConstantTable());
+  unsigned GvIdx = 0;
+
+  auto ComputeDeps = [&](const llvm::GlobalValue &value) {
+    Strtab->recordIfStringConstant(GvIdx, value);
+    Infos[&value].GvIdx = GvIdx++;
+    collectImmediateDependencies(&value, &value);
+  };
+  // NOTE: The visitation of globals then functions has to line up with
+  // `readAndMaterializeDependencies`.
+  for (const llvm::GlobalVariable &global : MainModule->globals()) {
+    ComputeDeps(global);
+    if (!global.hasInternalLinkage() && !global.hasPrivateLinkage())
+      TransDeps[&global];
+  }
+  for (const llvm::Function &Fn : MainModule->functions()) {
+    ComputeDeps(Fn);
+    if (!Fn.isDeclaration() && (Fn.hasExternalLinkage() || Fn.hasWeakLinkage()))
+      TransDeps[&Fn];
+  }
+
+  // If there is only one (or fewer) exported functions, forward the main
+  // module.
+  if (TransDeps.size() <= 1)
+    return ProcessFn(forwardModule(std::move(MainModule)), std::nullopt,
+                     /*numFunctionBase=*/0);
+
+  // Now for each export'd global value, compute the transitive set of
+  // dependencies using DFS.
+  SmallVector<const llvm::GlobalValue *> Worklist;
+  for (auto &[Value, Deps] : TransDeps) {
+    Worklist.clear();
+    Worklist.push_back(Value);
+    while (!Worklist.empty()) {
+      const llvm::GlobalValue *It = Worklist.pop_back_val();
+
+      auto [iter, inserted] = Deps.Deps.insert({It, -1});
+      if (!inserted) {
+        // Already visited.
+        continue;
+      }
+      // Pay the cost of the name lookup only on a miss.
+      const ValueInfo &Info = Infos.at(It);
+      iter->second = Info.GvIdx;
+
+      // If this value depends on another value that is going to be split, we
+      // don't want to duplicate the symbol. Keep all the users together.
+      if (It != Value) {
+        if (auto* DepIt = TransDeps.find(It);
+            DepIt != TransDeps.end()) {
+          auto &Users = SplitAnchorUsers[It];
+          Users.insert(&Deps);
+          // Make sure to include the other value in its own user list.
+          Users.insert(&DepIt->second);
+          // We don't have to recurse since the subgraph will get processed.
+          continue;
+        }
+      }
+
+      // If this value depends on a mutable global, keep track of it. We have to
+      // put all users of a mutable global in the same module.
+      if (auto *Global = dyn_cast<llvm::GlobalVariable>(It);
+          Global && !Global->isConstant())
+        SplitAnchorUsers[Global].insert(&Deps);
+
+      // Recursive on dependencies.
+      llvm::append_range(Worklist, Info.Dependencies);
+    }
+
+    Deps.Complete = true;
+  }
+
+  // For each mutable global, grab all the transitive users and put them in one
+  // module. If global A has user set A* and global B has user set B* where
+  // A* and B* have an empty intersection, all values in A* will be assigned 0
+  // and all values in B* will be assigned 1. If global C has user set C* that
+  // overlaps both A* and B*, it will overwrite both to 2.
+  SmallVector<SmallVector<TransitiveDeps *>> Bucketing(SplitAnchorUsers.size());
+  for (auto [CurMutIdx, Bucket, Users] :
+       llvm::enumerate(Bucketing, llvm::make_second_range(SplitAnchorUsers))) {
+    for (TransitiveDeps *Deps : Users) {
+      if (Deps->MutIdx && *Deps->MutIdx != CurMutIdx) {
+        auto &OtherBucket = Bucketing[*Deps->MutIdx];
+        for (TransitiveDeps *Other : OtherBucket) {
+          Bucket.push_back(Other);
+          Other->MutIdx = CurMutIdx;
+        }
+        OtherBucket.clear();
+        assert(*Deps->MutIdx == CurMutIdx);
+      } else {
+        Bucket.push_back(Deps);
+        Deps->MutIdx = CurMutIdx;
+      }
+    }
+  }
+
+  // Now that we have assigned buckets to each value, merge the transitive
+  // dependency sets of all values belonging to the same set.
+  SmallVector<llvm::MapVector<const llvm::GlobalValue *, unsigned>> Buckets(
+      Bucketing.size());
+  for (auto [Deps, Bucket] : llvm::zip(Bucketing, Buckets)) {
+    for (TransitiveDeps *Dep : Deps) {
+      for (auto &NamedValue : Dep->Deps)
+        Bucket.insert(NamedValue);
+    }
+  }
+
+  SmallVector<llvm::MapVector<const llvm::GlobalValue *, unsigned> *>
+      SetsToProcess;
+  SetsToProcess.reserve(Buckets.size() + TransDeps.size());
+
+  // Clone each mutable global bucket into its own module.
+  for (auto &Bucket : Buckets) {
+    if (Bucket.empty())
+      continue;
+    SetsToProcess.push_back(&Bucket);
+  }
+
+  for (auto &[Root, Deps] : TransDeps) {
+    // Skip values included in another transitive dependency set and values
+    // included in mutable global sets.
+    if (!Deps.MutIdx)
+      SetsToProcess.push_back(&Deps.Deps);
+  }
+
+  if (SetsToProcess.size() <= 1)
+    return ProcessFn(forwardModule(std::move(MainModule)), std::nullopt,
+                     /*numFunctionBase=*/0);
+
+  // Sort the sets by to schedule the larger modules first.
+  llvm::sort(SetsToProcess,
+             [](auto *Lhs, auto *Rhs) { return Lhs->size() > Rhs->size(); });
+
+  // Prepare to materialize slices of the module by first writing the main
+  // module as bitcode to a shared buffer.
+  std::string BufStr;
+  llvm::raw_string_ostream BufOS(BufStr);
+  {
+    llvm::Module &Module = Strtab->externalizeStrings(std::move(MainModule));
+    llvm::WriteBitcodeToFile(Module, BufOS);
+  }
+
+  auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(BufStr.size());
+  memcpy(Buf->getBufferStart(), BufStr.c_str(), BufStr.size());
+
+  unsigned NumFunctions = 0;
+  for (auto [Idx, Set] : llvm::enumerate(SetsToProcess)) {
+    unsigned Next = NumFunctions + Set->size();
+    auto MakeModule =
+        [Set = std::move(*Set),
+         Buf = MemoryBufferRef((*Buf).MemoryBuffer::getBuffer(), ""),
+         Strtab = Strtab]() mutable {
+          return readAndMaterializeDependencies(Buf, Set, *Strtab);
+        };
+    ProcessFn(std::move(MakeModule), Idx, NumFunctions);
+    NumFunctions = Next;
+  }
+}
+
+void LLVMModuleSplitterImpl::collectImmediateDependencies(
+    const llvm::Value *Value, const llvm::GlobalValue *Orig) {
+  for (const llvm::Value *User : Value->users()) {
+    // Recurse into pure constant users.
+    if (isa<llvm::Constant>(User) && !isa<llvm::GlobalValue>(User)) {
+      collectImmediateDependencies(User, Orig);
+      continue;
+    }
+
+    if (auto *Inst = dyn_cast<llvm::Instruction>(User)) {
+      const llvm::Function *Func = Inst->getParent()->getParent();
+      Infos[Func].Dependencies.push_back(Orig);
+    } else if (auto *GlobalVal = dyn_cast<llvm::GlobalValue>(User)) {
+      Infos[GlobalVal].Dependencies.push_back(Orig);
+    } else {
+      llvm_unreachable("unexpected user of global value");
+    }
+  }
+}
+
+namespace {
+/// This class provides support for splitting an LLVM module into multiple
+/// parts.
+/// TODO: Clean up the splitters here (some code duplication) when we can move
+/// to per function llvm compilation.
+class LLVMModulePerFunctionSplitterImpl {
+public:
+  LLVMModulePerFunctionSplitterImpl(LLVMModuleAndContext Module)
+      : mainModule(std::move(Module)) {}
+
+  /// Split the LLVM module into multiple modules using the provided process
+  /// function.
+  void
+  split(LLVMSplitProcessFn ProcessFn,
+        llvm::StringMap<llvm::GlobalValue::LinkageTypes> &SymbolLinkageTypes,
+        unsigned NumFunctionBase);
+
+private:
+  struct ValueInfo {
+    const llvm::Value *Value = nullptr;
+    bool CanBeSplit = true;
+    llvm::SmallPtrSet<const llvm::GlobalValue *, 4> Dependencies;
+    llvm::SmallPtrSet<const llvm::GlobalValue *, 4> Users;
+    /// Map each global value to its index in the module. We will use this to
+    /// materialize global values from bitcode.
+    unsigned GvIdx;
+    bool UserEmpty = true;
+  };
+
+  /// Collect all of the immediate global value users of `value`.
+  void collectValueUsers(const llvm::GlobalValue *Value);
+
+  /// Propagate use information through the module.
+  void propagateUseInfo();
+
+  /// The main LLVM module being split.
+  LLVMModuleAndContext MainModule;
+
+  /// The value info for each global value in the module.
+  llvm::MapVector<const llvm::GlobalValue *, ValueInfo> ValueInfos;
+};
+} // namespace
+
+/// support for splitting an LLVM module into multiple parts with each part
+/// contains only one function (with exception for coroutine related functions.)
+void splitPerFunction(
+    LLVMModuleAndContext Module, LLVMSplitProcessFn ProcessFn,
+    llvm::StringMap<llvm::GlobalValue::LinkageTypes> &SymbolLinkageTypes,
+    unsigned NumFunctionBase) {
+  LLVMModulePerFunctionSplitterImpl Impl(std::move(Module));
+  Impl.split(ProcessFn, SymbolLinkageTypes, NumFunctionBase);
+}
+
+/// Split the LLVM module into multiple modules using the provided process
+/// function.
+void LLVMModulePerFunctionSplitterImpl::split(
+    LLVMSplitProcessFn ProcessFn,
+    llvm::StringMap<llvm::GlobalValue::LinkageTypes> &SymbolLinkageTypes,
+    unsigned NumFunctionBase) {
+  // Compute the value info for each global in the module.
+  // NOTE: The visitation of globals then functions has to line up with
+  // `readAndMaterializeDependencies`.
+  IntrusiveRefCntPtr<StringConstantTable> Strtab(new StringConstantTable());
+  unsigned GvIdx = 0;
+  auto ComputeUsers = [&](const llvm::GlobalValue &Value) {
+    Strtab->recordIfStringConstant(GvIdx, Value);
+    ValueInfos[&Value].GvIdx = GvIdx++;
+    collectValueUsers(&Value);
+  };
+  llvm::for_each(MainModule->globals(), ComputeUsers);
+  llvm::for_each(MainModule->functions(), ComputeUsers);
+
+  // With use information collected, propagate it to the dependencies.
+  propagateUseInfo();
+
+  // Now we can split the module.
+  // We split the module per function and cloning any necessary dependencies:
+  // - For function dependencies, only clone the declaration unless its
+  //   coroutine related.
+  // - For other internal values, clone as is.
+  // This is much fine-grained splitting, which enables significantly higher
+  // levels of parallelism (and smaller generated artifacts).
+  // LLVM LTO style optimization may suffer a bit here since we don't have
+  // the full callstack present anymore in each cloned module.
+  llvm::DenseSet<const llvm::Value *> SplitValues;
+  SmallVector<llvm::MapVector<const llvm::GlobalValue *, unsigned>>
+      SetsToProcess;
+
+  // Hoist these collections to re-use memory allocations.
+  llvm::ValueToValueMapTy ValueMap;
+  SmallPtrSet<const llvm::Value *, 4> SplitDeps;
+  auto SplitValue = [&](const llvm::GlobalValue *Root) {
+    // If the function is already split, e.g. if it was a dependency of
+    // another function, skip it.
+    if (SplitValues.count(Root))
+      return;
+
+    auto &ValueInfo = ValueInfos[Root];
+    ValueMap.clear();
+    SplitDeps.clear();
+    auto ShouldSplit = [&](const llvm::GlobalValue *GlobalVal,
+                           const struct ValueInfo &Info) {
+      // Only clone root and the declaration of its dependencies.
+      if (GlobalVal == Root) {
+        SplitDeps.insert(GlobalVal);
+        return true;
+      }
+
+      if ((Info.CanBeSplit || Info.UserEmpty) &&
+          isa_and_nonnull<llvm::Function>(GlobalVal))
+        return false;
+
+      if (ValueInfo.Dependencies.contains(GlobalVal)) {
+        SplitDeps.insert(GlobalVal);
+        return true;
+      }
+
+      return false;
+    };
+
+    auto &Set = SetsToProcess.emplace_back();
+    for (auto &[GlobalVal, Info] : ValueInfos) {
+      if (ShouldSplit(GlobalVal, Info))
+        Set.insert({GlobalVal, Info.GvIdx});
+    }
+    if (Set.empty())
+      SetsToProcess.pop_back();
+
+    // Record the split values.
+    SplitValues.insert(SplitDeps.begin(), SplitDeps.end());
+  };
+
+  [[maybe_unused]] int64_t Count = 0;
+  SmallVector<const llvm::GlobalValue *> ToSplit;
+  unsigned UnnamedGlobal = NumFunctionBase;
+
+  for (auto &Global : MainModule->globals()) {
+    if (Global.hasInternalLinkage() || Global.hasPrivateLinkage()) {
+      if (!Global.hasName()) {
+        // Give unnamed GlobalVariable a unique name so that MCLink will not get
+        // confused to name them while generating linked code since the IR
+        // values can be different in each splits (for X86 backend.)
+        // asan build inserts these unnamed GlobalVariables.
+        Global.setName("__llvm_split_unnamed" + Twine(UnnamedGlobal++));
+      }
+
+      SymbolLinkageTypes.insert({Global.getName().str(), Global.getLinkage()});
+      Global.setLinkage(llvm::GlobalValue::WeakAnyLinkage);
+      continue;
+    }
+
+    if (Global.hasExternalLinkage())
+      continue;
+
+    // TODO: Add special handling for `llvm.global_ctors` and
+    // `llvm.global_dtors`, because otherwise they end up tying almost all
+    // symbols into the same split.
+    LLVM_DEBUG(llvm::dbgs()
+                   << (Count++) << ": split global: " << Global << "\n";);
+    ToSplit.emplace_back(&Global);
+  }
+
+  for (auto &Fn : MainModule->functions()) {
+    if (Fn.isDeclaration())
+      continue;
+
+    ValueInfo &Info = ValueInfos[&Fn];
+    if (Fn.hasInternalLinkage() || Fn.hasPrivateLinkage()) {
+      // Avoid renaming when linking in MCLink.
+      SymbolLinkageTypes.insert({Fn.getName().str(), Fn.getLinkage()});
+      Fn.setLinkage(llvm::Function::LinkageTypes::WeakAnyLinkage);
+    }
+
+    if (Info.CanBeSplit || Info.UserEmpty) {
+      LLVM_DEBUG(llvm::dbgs()
+                     << (Count++) << ": split fn: " << Fn.getName() << "\n";);
+      ToSplit.emplace_back(&Fn);
+    }
+  }
+
+  // Run this now since we just changed the linkages.
+  for (const llvm::GlobalValue *Value : ToSplit)
+    SplitValue(Value);
+
+  if (SetsToProcess.size() <= 1)
+    return ProcessFn(forwardModule(std::move(MainModule)), std::nullopt,
+                     NumFunctionBase);
+
+  // Prepare to materialize slices of the module by first writing the main
+  // module as bitcode to a shared buffer.
+  std::string BufStr;
+  llvm::raw_string_ostream BufOS(BufStr);
+  {
+    llvm::Module &Module = Strtab->externalizeStrings(std::move(MainModule));
+    llvm::WriteBitcodeToFile(Module, BufOS);
+  }
+
+  auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(BufStr.size());
+  memcpy(Buf->getBufferStart(), BufStr.c_str(), BufStr.size());
+  unsigned NumFunctions = 0;
+  for (auto [Idx, Set] : llvm::enumerate(SetsToProcess)) {
+    unsigned Next = NumFunctions + Set.size();
+    // Giving each function a unique ID across all splits for proper MC level
+    // linking and codegen into one object file where duplicated functions
+    // in each split will be deduplicated (with the linking).
+    auto MakeModule =
+        [Set = std::move(Set),
+         Buf = MemoryBufferRef((*Buf).MemoryBuffer::getBuffer(), ""),
+         Strtab = Strtab]() mutable {
+          return readAndMaterializeDependencies(Buf, Set, *Strtab);
+        };
+    ProcessFn(std::move(MakeModule), Idx, NumFunctions);
+    NumFunctions = Next;
+  }
+}
+
+/// Collect all of the immediate global value users of `value`.
+void LLVMModulePerFunctionSplitterImpl::collectValueUsers(
+    const llvm::GlobalValue *Value) {
+  SmallVector<const llvm::User *> Worklist(Value->users());
+
+  while (!Worklist.empty()) {
+    const llvm::User *UserIt = Worklist.pop_back_val();
+
+    // Recurse into pure constant users.
+    if (isa<llvm::Constant>(UserIt) && !isa<llvm::GlobalValue>(UserIt)) {
+      Worklist.append(UserIt->user_begin(), UserIt->user_end());
+      continue;
+    }
+
+    if (const auto *Inst = dyn_cast<llvm::Instruction>(UserIt)) {
+      const llvm::Function *Func = Inst->getParent()->getParent();
+      ValueInfos[Value].Users.insert(Func);
+      ValueInfos[Func];
+    } else if (const auto *GlobalVal = dyn_cast<llvm::GlobalValue>(UserIt)) {
+      ValueInfos[Value].Users.insert(GlobalVal);
+      ValueInfos[GlobalVal];
+    } else {
+      llvm_unreachable("unexpected user of global value");
+    }
+  }
+
+  // If the current value is a mutable global variable, then it can't be
+  // split.
+  if (auto *Global = dyn_cast<llvm::GlobalVariable>(Value))
+    ValueInfos[Value].CanBeSplit = Global->isConstant();
+}
+
+/// Propagate use information through the module.
+void LLVMModulePerFunctionSplitterImpl::propagateUseInfo() {
+  std::vector<ValueInfo *> Worklist;
+
+  // Each value depends on itself. Seed the iteration with that.
+  for (auto &[Value, Info] : ValueInfos) {
+    if (auto Func = llvm::dyn_cast<llvm::Function>(Value)) {
+      if (Func->isDeclaration())
+        continue;
+    }
+
+    Info.Dependencies.insert(Value);
+    Info.Value = Value;
+    Worklist.push_back(&Info);
+    if (!Info.CanBeSplit) {
+      // If a value cannot be split, its users are also its dependencies.
+      llvm::set_union(Info.Dependencies, Info.Users);
+    }
+  }
+
+  while (!Worklist.empty()) {
+    ValueInfo *Info = Worklist.back();
+    Worklist.pop_back();
+
+    // Propagate the dependencies of this value to its users.
+    for (const llvm::GlobalValue *User : Info->Users) {
+      ValueInfo &UserInfo = ValueInfos.find(User)->second;
+      if (Info == &UserInfo)
+        continue;
+      bool Changed = false;
+
+      // Merge dependency to user if current value is not a function that will
+      // be split into a separate module.
+      bool MergeToUserDep = true;
+      if (llvm::isa_and_nonnull<llvm::Function>(Info->Value)) {
+        MergeToUserDep = !Info->CanBeSplit;
+      }
+
+      // If there is a change, add the user info to the worklist.
+      if (MergeToUserDep) {
+        if (llvm::set_union(UserInfo.Dependencies, Info->Dependencies))
+          Changed = true;
+      }
+
+      // If the value cannot be split, its users cannot be split either.
+      if (!Info->CanBeSplit && UserInfo.CanBeSplit) {
+        UserInfo.CanBeSplit = false;
+        Changed = true;
+        // If a value cannot be split, its users are also its dependencies.
+        llvm::set_union(UserInfo.Dependencies, UserInfo.Users);
+      }
+
+      if (Changed) {
+        UserInfo.Value = User;
+        Worklist.push_back(&UserInfo);
+      }
+    }
+
+    if (Info->CanBeSplit || isa_and_nonnull<llvm::GlobalValue>(Info->Value))
+      continue;
+
+    // If a value cannot be split, propagate its dependencies up to its
+    // dependencies.
+    for (const llvm::GlobalValue *Dep : Info->Dependencies) {
+      ValueInfo &DepInfo = ValueInfos.find(Dep)->second;
+      if (Info == &DepInfo)
+        continue;
+      if (llvm::set_union(DepInfo.Dependencies, Info->Dependencies)) {
+        DepInfo.Value = Dep;
+        Worklist.push_back(&DepInfo);
+      }
+    }
+  }
+
+  for (auto &[Value, Info] : ValueInfos) {
+    Info.UserEmpty = Info.Users.empty() ||
+                     (Info.Users.size() == 1 && Info.Users.contains(Value));
+  }
+}
diff --git a/llvm/lib/ModuleSplitter/CMakeLists.txt b/llvm/lib/ModuleSplitter/CMakeLists.txt
new file mode 100644
index 0000000000000..cba910855cadd
--- /dev/null
+++ b/llvm/lib/ModuleSplitter/CMakeLists.txt
@@ -0,0 +1,13 @@
+add_llvm_component_library(LLVMModuleSplitter
+  ModuleSplitter.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/ModuleSplitter
+
+  LINK_COMPONENTS
+  Bitcode
+  Core
+  IR
+  IRReader
+  Support
+  )
diff --git a/llvm/lib/ModuleSplitter/ModuleSplitter.cpp b/llvm/lib/ModuleSplitter/ModuleSplitter.cpp
new file mode 100644
index 0000000000000..68e4a12766d73
--- /dev/null
+++ b/llvm/lib/ModuleSplitter/ModuleSplitter.cpp
@@ -0,0 +1,814 @@
+//===--- ModuleSplitter.cpp - Module Splitter -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/ModuleSplitter.h"
+
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/Transforms/Utils/SplitModule.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+
+using namespace llvm;
+#define DEBUG_TYPE "llvm-module-split"
+
+//===----------------------------------------------------------------------===//
+// LLVMModuleAndContext
+//===----------------------------------------------------------------------===//
+
+Expected<bool> LLVMModuleAndContext::create(
+    function_ref<Expected<std::unique_ptr<llvm::Module>>(llvm::LLVMContext &)>
+        CreateModule) {
+  assert(!Module && "already have a module");
+  auto ModuleOr = CreateModule(*Ctx);
+  if (Error Err = ModuleOr.takeError())
+    return Err;
+
+  Module = std::move(*ModuleOr);
+  return true;
+}
+
+void LLVMModuleAndContext::reset() {
+  Module.reset();
+  Ctx.reset();
+}
+
+//===----------------------------------------------------------------------===//
+// StringConstantTable
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// Large strings are very inefficiently encoded in LLVM bitcode (each `char` is
+/// encoded as a `uint64_t`). The LLVM bitcode reader is also very inefficiently
+/// reads strings back, performing 3 ultimate copies of the data. This is made
+/// worse by the fact the `getLazyBitcodeModule` does not lazily parse constants
+/// from the LLVM bitcode. Thus, when per-function splitting a module with N
+/// functions and M large string constants, we form 3*M*N copies of the large
+/// strings.
+///
+/// This class is part of a workaround of this inefficiency. When processing a
+/// module for splitting, we track any string global constants and their indices
+/// in this table. If a module is going to be roundtripped through bitcode to be
+/// lazily loaded, we externalize the strings by setting the corresponding
+/// constants to `zeroinitializer` in the module before it is written to
+/// bitcode. As we materialize constants on the other side, we check for a
+/// materialized global variable that matches an entry in the string table and
+/// directly copy the data over into the new LLVM context.
+///
+/// We can generalize this optimization to other large data types as necessary.
+///
+/// This class is used in an `RCRef` to be shared across multiple threads.
+class StringConstantTable
+    : public ThreadSafeRefCountedBase<StringConstantTable> {
+  /// An entry in the string table consists of a global variable, its module
+  /// index, and the a reference to the string data. Because the string data is
+  /// owned by the original LLVM context, we have to ensure it stays alive.
+  struct Entry {
+    unsigned Idx;
+    const llvm::GlobalVariable *Var;
+    StringRef Value;
+  };
+
+public:
+  /// If `Value` denotes a string constant, record the data at index `GvIdx`.
+  void recordIfStringConstant(unsigned GvIdx, const llvm::GlobalValue &Value) {
+    auto Var = dyn_cast<llvm::GlobalVariable>(&Value);
+    if (Var && Var->isConstant() && Var->hasInternalLinkage()) {
+      auto *Init =
+          dyn_cast<llvm::ConstantDataSequential>(Var->getInitializer());
+      if (Init && Init->isCString())
+        StringConstants.push_back(Entry{GvIdx, Var, Init->getAsString()});
+    }
+  }
+
+  /// Before writing the main Module to bitcode, externalize large string
+  /// constants by stubbing out their values. Take ownership of the main Module
+  /// so the string data stays alive.
+  llvm::Module &externalizeStrings(LLVMModuleAndContext &&Module) {
+    MainModule = std::move(Module);
+    // Stub the initializers. The global variable is an internal constant, so it
+    // must have an initializer.
+    for (Entry &E : StringConstants) {
+      auto *Stub =
+          llvm::Constant::getNullValue(E.Var->getInitializer()->getType());
+      // `const_cast` is OK because we own the module now.
+      const_cast<llvm::GlobalVariable *>(E.Var)->setInitializer(Stub);
+    }
+    return *MainModule;
+  }
+
+  /// This is an iterator over the entries in the string table.
+  class Injector {
+    using const_iterator = std::vector<Entry>::const_iterator;
+
+  public:
+    /// Given a global variable in a materialized module and its index, if it is
+    /// a string constant found in the table, copy the data over into the new
+    /// LLVM context and set the initializer.
+    void materializeIfStringConstant(unsigned GvIdx,
+                                     llvm::GlobalVariable &Var) {
+      while (It != Et && It->Idx < GvIdx)
+        ++It;
+      if (It == Et || It->Idx != GvIdx)
+        return;
+      Var.setInitializer(llvm::ConstantDataArray::getString(
+          Var.getType()->getContext(), It->Value, /*AddNull=*/false));
+    }
+
+  private:
+    explicit Injector(const_iterator It, const_iterator Et) : It(It), Et(Et) {}
+
+    const_iterator It, Et;
+
+    friend class StringConstantTable;
+  };
+
+  Injector begin() const {
+    return Injector(StringConstants.begin(), StringConstants.end());
+  }
+
+private:
+  std::vector<Entry> StringConstants;
+  LLVMModuleAndContext MainModule;
+};
+
+//===----------------------------------------------------------------------===//
+// Module Splitter
+//===----------------------------------------------------------------------===//
+
+class LLVMModuleSplitterImpl {
+public:
+  explicit LLVMModuleSplitterImpl(LLVMModuleAndContext Module)
+      : MainModule(std::move(Module)) {}
+
+  /// Split the LLVM module into multiple modules using the provided process
+  /// function.
+  void split(LLVMSplitProcessFn ProcessFn,
+             llvm::SmallVectorImpl<llvm::Function> &Anchors);
+
+private:
+  struct ValueInfo {
+    /// The immediate global value dependencies of a value.
+    SmallVector<const llvm::GlobalValue *> Dependencies;
+    /// Map each global value to its index in the module. We will use this to
+    /// materialize global values from bitcode.
+    unsigned GvIdx;
+  };
+
+  struct TransitiveDeps {
+    /// The transitive dependencies.
+    llvm::MapVector<const llvm::GlobalValue *, unsigned> Deps;
+    /// True if computation is complete.
+    bool Complete = false;
+    /// The assigned module index.
+    std::optional<unsigned> MutIdx;
+  };
+
+  /// Collect the immediate global value dependencies of `Value`. `Orig` is the
+  /// original transitive value, which is not equal to `Value` when it is used
+  /// in a constant.
+  void collectImmediateDependencies(const llvm::Value *Value,
+                                    const llvm::GlobalValue *Orig);
+
+  /// The main LLVM module being split.
+  LLVMModuleAndContext MainModule;
+
+  /// The value info for each global value in the module.
+  llvm::DenseMap<const llvm::Value *, ValueInfo> Infos;
+
+  /// The transitive dependencies of each global value.
+  llvm::MapVector<const llvm::GlobalValue *, TransitiveDeps> TransDeps;
+
+  /// Users of split "anchors". These are global values where we don't want
+  /// their users to be split into different modules because it will cause the
+  /// symbol to be duplicated.
+  llvm::MapVector<const llvm::GlobalValue *, llvm::SetVector<TransitiveDeps *>>
+      SplitAnchorUsers;
+};
+} // namespace
+
+static LLVMModuleAndContext readAndMaterializeDependencies(
+    MemoryBufferRef &Buf,
+    const llvm::MapVector<const llvm::GlobalValue *, unsigned> &Set,
+    const StringConstantTable &Strtab) {
+
+  // First, create a lazy module with an internal bitcode materializer.
+  // TODO: Not sure how to make lazy loading metadata work.
+  LLVMModuleAndContext Result;
+  {
+    (void)Result.create(
+        [&](llvm::LLVMContext &Ctx) -> Expected<std::unique_ptr<Module>> {
+          return llvm::cantFail(
+              llvm::getLazyBitcodeModule(Buf, Ctx,
+                                         /*ShouldLazyLoadMetadata=*/false));
+        });
+    Result->setModuleInlineAsm("");
+  }
+
+  SmallVector<unsigned> SortIndices =
+      llvm::to_vector(llvm::make_second_range(Set));
+  llvm::sort(SortIndices, std::less<unsigned>());
+  auto* IdxIt = SortIndices.begin();
+  auto* IdxEnd = SortIndices.end();
+
+  // The global value indices go from globals, functions, then aliases. This
+  // mirrors the order in which global values are deleted by LLVM's GlobalDCE.
+  unsigned CurIdx = 0;
+  StringConstantTable::Injector It = Strtab.begin();
+  // We need to keep the IR "valid" for the verifier because `materializeAll`
+  // may invoke it. It doesn't matter since we're deleting the globals anyway.
+  for (llvm::GlobalVariable &Global : Result->globals()) {
+    if (IdxIt != IdxEnd && CurIdx == *IdxIt) {
+      ++IdxIt;
+      llvm::cantFail(Global.materialize());
+      It.materializeIfStringConstant(CurIdx, Global);
+    } else {
+      Global.setInitializer(nullptr);
+      Global.setComdat(nullptr);
+      Global.setLinkage(llvm::GlobalValue::ExternalLinkage);
+      // External link should not be DSOLocal anymore,
+      // otherwise position independent code generates
+      // `R_X86_64_PC32` instead of `R_X86_64_REX_GOTPCRELX`
+      // for these symbols and building shared library from
+      // a static archive of this module will error with an `fPIC` confusion.
+      Global.setDSOLocal(false);
+    }
+    ++CurIdx;
+  }
+  for (llvm::Function &Func : Result->functions()) {
+    if (IdxIt != IdxEnd && CurIdx == *IdxIt) {
+      ++IdxIt;
+      llvm::cantFail(Func.materialize());
+    } else {
+      Func.deleteBody();
+      Func.setComdat(nullptr);
+      Func.setLinkage(llvm::GlobalValue::ExternalLinkage);
+      // External link should not be DSOLocal anymore,
+      // otherwise position independent code generates
+      // `R_X86_64_PC32` instead of `R_X86_64_REX_GOTPCRELX`
+      // for these symbols and building shared library from
+      // a static archive of this module will error with an `fPIC` confusion.
+      // External link should not be DSOLocal anymore,
+      // otherwise position independent code generation get confused.
+      Func.setDSOLocal(false);
+    }
+    ++CurIdx;
+  }
+
+  // Finalize materialization of the module.
+  llvm::cantFail(Result->materializeAll());
+
+  // Now that the module is materialized, we can start deleting stuff. Just
+  // delete declarations with no uses.
+  for (llvm::GlobalVariable &Global :
+       llvm::make_early_inc_range(Result->globals())) {
+    if (Global.isDeclaration() && Global.use_empty())
+      Global.eraseFromParent();
+  }
+  for (llvm::Function &Func : llvm::make_early_inc_range(Result->functions())) {
+    if (Func.isDeclaration() && Func.use_empty())
+      Func.eraseFromParent();
+  }
+  return Result;
+}
+
+/// support for splitting an LLVM module into multiple parts using exported
+/// functions as anchors, and pull in all dependency on the call stack into one
+/// module.
+void splitPerAnchored(LLVMModuleAndContext Module, LLVMSplitProcessFn ProcessFn,
+                      llvm::SmallVectorImpl<llvm::Function> &Anchors) {
+  LLVMModuleSplitterImpl Impl(std::move(Module));
+  Impl.split(ProcessFn, Anchors);
+}
+
+void LLVMModuleSplitterImpl::split(
+    LLVMSplitProcessFn ProcessFn,
+    llvm::SmallVectorImpl<llvm::Function> &Anchors) {
+  // The use-def list is sparse. Use it to build a sparse dependency graph
+  // between global values.
+  IntrusiveRefCntPtr<StringConstantTable> Strtab(new StringConstantTable());
+  unsigned GvIdx = 0;
+
+  auto ComputeDeps = [&](const llvm::GlobalValue &value) {
+    Strtab->recordIfStringConstant(GvIdx, value);
+    Infos[&value].GvIdx = GvIdx++;
+    collectImmediateDependencies(&value, &value);
+  };
+  // NOTE: The visitation of globals then functions has to line up with
+  // `readAndMaterializeDependencies`.
+  for (const llvm::GlobalVariable &global : MainModule->globals()) {
+    ComputeDeps(global);
+    if (!global.hasInternalLinkage() && !global.hasPrivateLinkage())
+      TransDeps[&global];
+  }
+  for (const llvm::Function &Fn : MainModule->functions()) {
+    ComputeDeps(Fn);
+    if (!Fn.isDeclaration() && (Fn.hasExternalLinkage() || Fn.hasWeakLinkage()))
+      TransDeps[&Fn];
+  }
+
+  // If there is only one (or fewer) exported functions, forward the main
+  // module.
+  if (TransDeps.size() <= 1)
+    return ProcessFn(forwardModule(std::move(MainModule)), std::nullopt,
+                     /*numFunctionBase=*/0);
+
+  // Now for each export'd global value, compute the transitive set of
+  // dependencies using DFS.
+  SmallVector<const llvm::GlobalValue *> Worklist;
+  for (auto &[Value, Deps] : TransDeps) {
+    Worklist.clear();
+    Worklist.push_back(Value);
+    while (!Worklist.empty()) {
+      const llvm::GlobalValue *It = Worklist.pop_back_val();
+
+      auto [iter, inserted] = Deps.Deps.insert({It, -1});
+      if (!inserted) {
+        // Already visited.
+        continue;
+      }
+      // Pay the cost of the name lookup only on a miss.
+      const ValueInfo &Info = Infos.at(It);
+      iter->second = Info.GvIdx;
+
+      // If this value depends on another value that is going to be split, we
+      // don't want to duplicate the symbol. Keep all the users together.
+      if (It != Value) {
+        if (auto* DepIt = TransDeps.find(It);
+            DepIt != TransDeps.end()) {
+          auto &Users = SplitAnchorUsers[It];
+          Users.insert(&Deps);
+          // Make sure to include the other value in its own user list.
+          Users.insert(&DepIt->second);
+          // We don't have to recurse since the subgraph will get processed.
+          continue;
+        }
+      }
+
+      // If this value depends on a mutable global, keep track of it. We have to
+      // put all users of a mutable global in the same module.
+      if (auto *Global = dyn_cast<llvm::GlobalVariable>(It);
+          Global && !Global->isConstant())
+        SplitAnchorUsers[Global].insert(&Deps);
+
+      // Recursive on dependencies.
+      llvm::append_range(Worklist, Info.Dependencies);
+    }
+
+    Deps.Complete = true;
+  }
+
+  // For each mutable global, grab all the transitive users and put them in one
+  // module. If global A has user set A* and global B has user set B* where
+  // A* and B* have an empty intersection, all values in A* will be assigned 0
+  // and all values in B* will be assigned 1. If global C has user set C* that
+  // overlaps both A* and B*, it will overwrite both to 2.
+  SmallVector<SmallVector<TransitiveDeps *>> Bucketing(SplitAnchorUsers.size());
+  for (auto [CurMutIdx, Bucket, Users] :
+       llvm::enumerate(Bucketing, llvm::make_second_range(SplitAnchorUsers))) {
+    for (TransitiveDeps *Deps : Users) {
+      if (Deps->MutIdx && *Deps->MutIdx != CurMutIdx) {
+        auto &OtherBucket = Bucketing[*Deps->MutIdx];
+        for (TransitiveDeps *Other : OtherBucket) {
+          Bucket.push_back(Other);
+          Other->MutIdx = CurMutIdx;
+        }
+        OtherBucket.clear();
+        assert(*Deps->MutIdx == CurMutIdx);
+      } else {
+        Bucket.push_back(Deps);
+        Deps->MutIdx = CurMutIdx;
+      }
+    }
+  }
+
+  // Now that we have assigned buckets to each value, merge the transitive
+  // dependency sets of all values belonging to the same set.
+  SmallVector<llvm::MapVector<const llvm::GlobalValue *, unsigned>> Buckets(
+      Bucketing.size());
+  for (auto [Deps, Bucket] : llvm::zip(Bucketing, Buckets)) {
+    for (TransitiveDeps *Dep : Deps) {
+      for (auto &NamedValue : Dep->Deps)
+        Bucket.insert(NamedValue);
+    }
+  }
+
+  SmallVector<llvm::MapVector<const llvm::GlobalValue *, unsigned> *>
+      SetsToProcess;
+  SetsToProcess.reserve(Buckets.size() + TransDeps.size());
+
+  // Clone each mutable global bucket into its own module.
+  for (auto &Bucket : Buckets) {
+    if (Bucket.empty())
+      continue;
+    SetsToProcess.push_back(&Bucket);
+  }
+
+  for (auto &[Root, Deps] : TransDeps) {
+    // Skip values included in another transitive dependency set and values
+    // included in mutable global sets.
+    if (!Deps.MutIdx)
+      SetsToProcess.push_back(&Deps.Deps);
+  }
+
+  if (SetsToProcess.size() <= 1)
+    return ProcessFn(forwardModule(std::move(MainModule)), std::nullopt,
+                     /*numFunctionBase=*/0);
+
+  // Sort the sets by to schedule the larger modules first.
+  llvm::sort(SetsToProcess,
+             [](auto *Lhs, auto *Rhs) { return Lhs->size() > Rhs->size(); });
+
+  // Prepare to materialize slices of the module by first writing the main
+  // module as bitcode to a shared buffer.
+  std::string BufStr;
+  llvm::raw_string_ostream BufOS(BufStr);
+  {
+    llvm::Module &Module = Strtab->externalizeStrings(std::move(MainModule));
+    llvm::WriteBitcodeToFile(Module, BufOS);
+  }
+
+  auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(BufStr.size());
+  memcpy(Buf->getBufferStart(), BufStr.c_str(), BufStr.size());
+
+  unsigned NumFunctions = 0;
+  for (auto [Idx, Set] : llvm::enumerate(SetsToProcess)) {
+    unsigned Next = NumFunctions + Set->size();
+    auto MakeModule =
+        [Set = std::move(*Set),
+         Buf = MemoryBufferRef((*Buf).MemoryBuffer::getBuffer(), ""),
+         Strtab = Strtab]() mutable {
+          return readAndMaterializeDependencies(Buf, Set, *Strtab);
+        };
+    ProcessFn(std::move(MakeModule), Idx, NumFunctions);
+    NumFunctions = Next;
+  }
+}
+
+void LLVMModuleSplitterImpl::collectImmediateDependencies(
+    const llvm::Value *Value, const llvm::GlobalValue *Orig) {
+  for (const llvm::Value *User : Value->users()) {
+    // Recurse into pure constant users.
+    if (isa<llvm::Constant>(User) && !isa<llvm::GlobalValue>(User)) {
+      collectImmediateDependencies(User, Orig);
+      continue;
+    }
+
+    if (auto *Inst = dyn_cast<llvm::Instruction>(User)) {
+      const llvm::Function *Func = Inst->getParent()->getParent();
+      Infos[Func].Dependencies.push_back(Orig);
+    } else if (auto *GlobalVal = dyn_cast<llvm::GlobalValue>(User)) {
+      Infos[GlobalVal].Dependencies.push_back(Orig);
+    } else {
+      llvm_unreachable("unexpected user of global value");
+    }
+  }
+}
+
+namespace {
+/// This class provides support for splitting an LLVM module into multiple
+/// parts.
+/// TODO: Clean up the splitters here (some code duplication) when we can move
+/// to per function llvm compilation.
+class LLVMModulePerFunctionSplitterImpl {
+public:
+  LLVMModulePerFunctionSplitterImpl(LLVMModuleAndContext Module)
+      : mainModule(std::move(Module)) {}
+
+  /// Split the LLVM module into multiple modules using the provided process
+  /// function.
+  void
+  split(LLVMSplitProcessFn ProcessFn,
+        llvm::StringMap<llvm::GlobalValue::LinkageTypes> &SymbolLinkageTypes,
+        unsigned NumFunctionBase);
+
+private:
+  struct ValueInfo {
+    const llvm::Value *Value = nullptr;
+    bool CanBeSplit = true;
+    llvm::SmallPtrSet<const llvm::GlobalValue *, 4> Dependencies;
+    llvm::SmallPtrSet<const llvm::GlobalValue *, 4> Users;
+    /// Map each global value to its index in the module. We will use this to
+    /// materialize global values from bitcode.
+    unsigned GvIdx;
+    bool UserEmpty = true;
+  };
+
+  /// Collect all of the immediate global value users of `value`.
+  void collectValueUsers(const llvm::GlobalValue *Value);
+
+  /// Propagate use information through the module.
+  void propagateUseInfo();
+
+  /// The main LLVM module being split.
+  LLVMModuleAndContext MainModule;
+
+  /// The value info for each global value in the module.
+  llvm::MapVector<const llvm::GlobalValue *, ValueInfo> ValueInfos;
+};
+} // namespace
+
+/// support for splitting an LLVM module into multiple parts with each part
+/// contains only one function (with exception for coroutine related functions.)
+void splitPerFunction(
+    LLVMModuleAndContext Module, LLVMSplitProcessFn ProcessFn,
+    llvm::StringMap<llvm::GlobalValue::LinkageTypes> &SymbolLinkageTypes,
+    unsigned NumFunctionBase) {
+  LLVMModulePerFunctionSplitterImpl Impl(std::move(Module));
+  Impl.split(ProcessFn, SymbolLinkageTypes, NumFunctionBase);
+}
+
+/// Split the LLVM module into multiple modules using the provided process
+/// function.
+void LLVMModulePerFunctionSplitterImpl::split(
+    LLVMSplitProcessFn ProcessFn,
+    llvm::StringMap<llvm::GlobalValue::LinkageTypes> &SymbolLinkageTypes,
+    unsigned NumFunctionBase) {
+  // Compute the value info for each global in the module.
+  // NOTE: The visitation of globals then functions has to line up with
+  // `readAndMaterializeDependencies`.
+  IntrusiveRefCntPtr<StringConstantTable> Strtab(new StringConstantTable());
+  unsigned GvIdx = 0;
+  auto ComputeUsers = [&](const llvm::GlobalValue &Value) {
+    Strtab->recordIfStringConstant(GvIdx, Value);
+    ValueInfos[&Value].GvIdx = GvIdx++;
+    collectValueUsers(&Value);
+  };
+  llvm::for_each(MainModule->globals(), ComputeUsers);
+  llvm::for_each(MainModule->functions(), ComputeUsers);
+
+  // With use information collected, propagate it to the dependencies.
+  propagateUseInfo();
+
+  // Now we can split the module.
+  // We split the module per function and cloning any necessary dependencies:
+  // - For function dependencies, only clone the declaration unless its
+  //   coroutine related.
+  // - For other internal values, clone as is.
+  // This is much fine-grained splitting, which enables significantly higher
+  // levels of parallelism (and smaller generated artifacts).
+  // LLVM LTO style optimization may suffer a bit here since we don't have
+  // the full callstack present anymore in each cloned module.
+  llvm::DenseSet<const llvm::Value *> SplitValues;
+  SmallVector<llvm::MapVector<const llvm::GlobalValue *, unsigned>>
+      SetsToProcess;
+
+  // Hoist these collections to re-use memory allocations.
+  llvm::ValueToValueMapTy ValueMap;
+  SmallPtrSet<const llvm::Value *, 4> SplitDeps;
+  auto SplitValue = [&](const llvm::GlobalValue *Root) {
+    // If the function is already split, e.g. if it was a dependency of
+    // another function, skip it.
+    if (SplitValues.count(Root))
+      return;
+
+    auto &ValueInfo = ValueInfos[Root];
+    ValueMap.clear();
+    SplitDeps.clear();
+    auto ShouldSplit = [&](const llvm::GlobalValue *GlobalVal,
+                           const struct ValueInfo &Info) {
+      // Only clone root and the declaration of its dependencies.
+      if (GlobalVal == Root) {
+        SplitDeps.insert(GlobalVal);
+        return true;
+      }
+
+      if ((Info.CanBeSplit || Info.UserEmpty) &&
+          isa_and_nonnull<llvm::Function>(GlobalVal))
+        return false;
+
+      if (ValueInfo.Dependencies.contains(GlobalVal)) {
+        SplitDeps.insert(GlobalVal);
+        return true;
+      }
+
+      return false;
+    };
+
+    auto &Set = SetsToProcess.emplace_back();
+    for (auto &[GlobalVal, Info] : ValueInfos) {
+      if (ShouldSplit(GlobalVal, Info))
+        Set.insert({GlobalVal, Info.GvIdx});
+    }
+    if (Set.empty())
+      SetsToProcess.pop_back();
+
+    // Record the split values.
+    SplitValues.insert(SplitDeps.begin(), SplitDeps.end());
+  };
+
+  [[maybe_unused]] int64_t Count = 0;
+  SmallVector<const llvm::GlobalValue *> ToSplit;
+  unsigned UnnamedGlobal = NumFunctionBase;
+
+  for (auto &Global : MainModule->globals()) {
+    if (Global.hasInternalLinkage() || Global.hasPrivateLinkage()) {
+      if (!Global.hasName()) {
+        // Give unnamed GlobalVariable a unique name so that MCLink will not get
+        // confused to name them while generating linked code since the IR
+        // values can be different in each splits (for X86 backend.)
+        // asan build inserts these unnamed GlobalVariables.
+        Global.setName("__llvm_split_unnamed" + Twine(UnnamedGlobal++));
+      }
+
+      SymbolLinkageTypes.insert({Global.getName().str(), Global.getLinkage()});
+      Global.setLinkage(llvm::GlobalValue::WeakAnyLinkage);
+      continue;
+    }
+
+    if (Global.hasExternalLinkage())
+      continue;
+
+    // TODO: Add special handling for `llvm.global_ctors` and
+    // `llvm.global_dtors`, because otherwise they end up tying almost all
+    // symbols into the same split.
+    LLVM_DEBUG(llvm::dbgs()
+                   << (Count++) << ": split global: " << Global << "\n";);
+    ToSplit.emplace_back(&Global);
+  }
+
+  for (auto &Fn : MainModule->functions()) {
+    if (Fn.isDeclaration())
+      continue;
+
+    ValueInfo &Info = ValueInfos[&Fn];
+    if (Fn.hasInternalLinkage() || Fn.hasPrivateLinkage()) {
+      // Avoid renaming when linking in MCLink.
+      SymbolLinkageTypes.insert({Fn.getName().str(), Fn.getLinkage()});
+      Fn.setLinkage(llvm::Function::LinkageTypes::WeakAnyLinkage);
+    }
+
+    if (Info.CanBeSplit || Info.UserEmpty) {
+      LLVM_DEBUG(llvm::dbgs()
+                     << (Count++) << ": split fn: " << Fn.getName() << "\n";);
+      ToSplit.emplace_back(&Fn);
+    }
+  }
+
+  // Run this now since we just changed the linkages.
+  for (const llvm::GlobalValue *Value : ToSplit)
+    SplitValue(Value);
+
+  if (SetsToProcess.size() <= 1)
+    return ProcessFn(forwardModule(std::move(MainModule)), std::nullopt,
+                     NumFunctionBase);
+
+  // Prepare to materialize slices of the module by first writing the main
+  // module as bitcode to a shared buffer.
+  std::string BufStr;
+  llvm::raw_string_ostream BufOS(BufStr);
+  {
+    llvm::Module &Module = Strtab->externalizeStrings(std::move(MainModule));
+    llvm::WriteBitcodeToFile(Module, BufOS);
+  }
+
+  auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(BufStr.size());
+  memcpy(Buf->getBufferStart(), BufStr.c_str(), BufStr.size());
+  unsigned NumFunctions = 0;
+  for (auto [Idx, Set] : llvm::enumerate(SetsToProcess)) {
+    unsigned Next = NumFunctions + Set.size();
+    // Giving each function a unique ID across all splits for proper MC level
+    // linking and codegen into one object file where duplicated functions
+    // in each split will be deduplicated (with the linking).
+    auto MakeModule =
+        [Set = std::move(Set),
+         Buf = MemoryBufferRef((*Buf).MemoryBuffer::getBuffer(), ""),
+         Strtab = Strtab]() mutable {
+          return readAndMaterializeDependencies(Buf, Set, *Strtab);
+        };
+    ProcessFn(std::move(MakeModule), Idx, NumFunctions);
+    NumFunctions = Next;
+  }
+}
+
+/// Collect all of the immediate global value users of `value`.
+void LLVMModulePerFunctionSplitterImpl::collectValueUsers(
+    const llvm::GlobalValue *Value) {
+  SmallVector<const llvm::User *> Worklist(Value->users());
+
+  while (!Worklist.empty()) {
+    const llvm::User *UserIt = Worklist.pop_back_val();
+
+    // Recurse into pure constant users.
+    if (isa<llvm::Constant>(UserIt) && !isa<llvm::GlobalValue>(UserIt)) {
+      Worklist.append(UserIt->user_begin(), UserIt->user_end());
+      continue;
+    }
+
+    if (const auto *Inst = dyn_cast<llvm::Instruction>(UserIt)) {
+      const llvm::Function *Func = Inst->getParent()->getParent();
+      ValueInfos[Value].Users.insert(Func);
+      ValueInfos[Func];
+    } else if (const auto *GlobalVal = dyn_cast<llvm::GlobalValue>(UserIt)) {
+      ValueInfos[Value].Users.insert(GlobalVal);
+      ValueInfos[GlobalVal];
+    } else {
+      llvm_unreachable("unexpected user of global value");
+    }
+  }
+
+  // If the current value is a mutable global variable, then it can't be
+  // split.
+  if (auto *Global = dyn_cast<llvm::GlobalVariable>(Value))
+    ValueInfos[Value].CanBeSplit = Global->isConstant();
+}
+
+/// Propagate use information through the module.
+void LLVMModulePerFunctionSplitterImpl::propagateUseInfo() {
+  std::vector<ValueInfo *> Worklist;
+
+  // Each value depends on itself. Seed the iteration with that.
+  for (auto &[Value, Info] : ValueInfos) {
+    if (auto Func = llvm::dyn_cast<llvm::Function>(Value)) {
+      if (Func->isDeclaration())
+        continue;
+    }
+
+    Info.Dependencies.insert(Value);
+    Info.Value = Value;
+    Worklist.push_back(&Info);
+    if (!Info.CanBeSplit) {
+      // If a value cannot be split, its users are also its dependencies.
+      llvm::set_union(Info.Dependencies, Info.Users);
+    }
+  }
+
+  while (!Worklist.empty()) {
+    ValueInfo *Info = Worklist.back();
+    Worklist.pop_back();
+
+    // Propagate the dependencies of this value to its users.
+    for (const llvm::GlobalValue *User : Info->Users) {
+      ValueInfo &UserInfo = ValueInfos.find(User)->second;
+      if (Info == &UserInfo)
+        continue;
+      bool Changed = false;
+
+      // Merge dependency to user if current value is not a function that will
+      // be split into a separate module.
+      bool MergeToUserDep = true;
+      if (llvm::isa_and_nonnull<llvm::Function>(Info->Value)) {
+        MergeToUserDep = !Info->CanBeSplit;
+      }
+
+      // If there is a change, add the user info to the worklist.
+      if (MergeToUserDep) {
+        if (llvm::set_union(UserInfo.Dependencies, Info->Dependencies))
+          Changed = true;
+      }
+
+      // If the value cannot be split, its users cannot be split either.
+      if (!Info->CanBeSplit && UserInfo.CanBeSplit) {
+        UserInfo.CanBeSplit = false;
+        Changed = true;
+        // If a value cannot be split, its users are also its dependencies.
+        llvm::set_union(UserInfo.Dependencies, UserInfo.Users);
+      }
+
+      if (Changed) {
+        UserInfo.Value = User;
+        Worklist.push_back(&UserInfo);
+      }
+    }
+
+    if (Info->CanBeSplit || isa_and_nonnull<llvm::GlobalValue>(Info->Value))
+      continue;
+
+    // If a value cannot be split, propagate its dependencies up to its
+    // dependencies.
+    for (const llvm::GlobalValue *Dep : Info->Dependencies) {
+      ValueInfo &DepInfo = ValueInfos.find(Dep)->second;
+      if (Info == &DepInfo)
+        continue;
+      if (llvm::set_union(DepInfo.Dependencies, Info->Dependencies)) {
+        DepInfo.Value = Dep;
+        Worklist.push_back(&DepInfo);
+      }
+    }
+  }
+
+  for (auto &[Value, Info] : ValueInfos) {
+    Info.UserEmpty = Info.Users.empty() ||
+                     (Info.Users.size() == 1 && Info.Users.contains(Value));
+  }
+}

>From c7d9d9ecb5b7f033b160ac7f5bb17009dedb9ba5 Mon Sep 17 00:00:00 2001
From: Weiwei Chen <weiwei.chen at modular.com>
Date: Fri, 14 Mar 2025 16:49:44 -0400
Subject: [PATCH 06/17] update source.

---
 llvm/lib/ModuleSplitter/ModuleSplitter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/ModuleSplitter/ModuleSplitter.cpp b/llvm/lib/ModuleSplitter/ModuleSplitter.cpp
index 68e4a12766d73..65c4da1aaa7d6 100644
--- a/llvm/lib/ModuleSplitter/ModuleSplitter.cpp
+++ b/llvm/lib/ModuleSplitter/ModuleSplitter.cpp
@@ -8,7 +8,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/ModuleSplitter.h"
+#include "llvm/ModuleSplitter/ModuleSplitter.h"
 
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/MapVector.h"

>From a3075f22bc4a9d535ef13f7d2500560c45197c64 Mon Sep 17 00:00:00 2001
From: Weiwei Chen <weiwei.chen at modular.com>
Date: Fri, 14 Mar 2025 16:50:43 -0400
Subject: [PATCH 07/17] Add llvm-module-splitter skeleton and bazel
 integration.

---
 .../tools/llvm-module-splitter/CMakeLists.txt |  11 ++
 .../llvm-module-splitter.cpp                  | 101 ++++++++++++++++++
 .../llvm-project-overlay/llvm/BUILD.bazel     |  33 ++++++
 3 files changed, 145 insertions(+)
 create mode 100644 llvm/tools/llvm-module-splitter/CMakeLists.txt
 create mode 100644 llvm/tools/llvm-module-splitter/llvm-module-splitter.cpp

diff --git a/llvm/tools/llvm-module-splitter/CMakeLists.txt b/llvm/tools/llvm-module-splitter/CMakeLists.txt
new file mode 100644
index 0000000000000..30ba638e4ffb8
--- /dev/null
+++ b/llvm/tools/llvm-module-splitter/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(LLVM_LINK_COMPONENTS
+  Core
+  IRReader
+  LLVMModuleSplitter
+  Support
+)
+
+add_llvm_tool(llvm-module-splitter
+  llvm-module-splitter.cpp
+
+)
diff --git a/llvm/tools/llvm-module-splitter/llvm-module-splitter.cpp b/llvm/tools/llvm-module-splitter/llvm-module-splitter.cpp
new file mode 100644
index 0000000000000..8e719dbcba9f9
--- /dev/null
+++ b/llvm/tools/llvm-module-splitter/llvm-module-splitter.cpp
@@ -0,0 +1,101 @@
+
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/ModuleSplitter/ModuleSplitter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+#include <utility>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Module Splitter
+//===----------------------------------------------------------------------===//
+
+/// Reads a module from a file.  On error, messages are written to stderr
+/// and null is returned.
+static std::unique_ptr<Module> readModule(LLVMContext &Context,
+                                          StringRef Name) {
+  SMDiagnostic Diag;
+  std::unique_ptr<Module> M = parseIRFile(Name, Diag, Context);
+  if (!M)
+    Diag.print("llvm-module-split", errs());
+  return M;
+}
+
+int main(int argc, char **argv) {
+
+  // Enable command line options for various MLIR internals.
+  llvm::cl::ParseCommandLineOptions(argc, argv);
+
+  LLVMModuleAndContext Module;
+  return 0;
+  //ErrorOrSuccess err = module.create(
+  //    [&](LLVMContext &ctx) -> M::ErrorOr<std::unique_ptr<Module>> {
+  //      if (std::unique_ptr<Module> module =
+  //              readModule(ctx, clOptions.inputFilename))
+  //        return module;
+  //      return M::Error("could not load LLVM file");
+  //    });
+  //if (err) {
+  //  llvm::errs() << err.getError() << "\n";
+  //  return -1;
+  //}
+
+  //std::unique_ptr<llvm::ToolOutputFile> output = nullptr;
+  //if (clOptions.outputPrefix == "-") {
+  //  std::error_code error;
+  //  output = std::make_unique<llvm::ToolOutputFile>(
+  //      clOptions.outputPrefix, error, llvm::sys::fs::OF_None);
+  //  if (error)
+  //    exit(clOptions.options.reportError("Cannot open output file: '" +
+  //                                       clOptions.outputPrefix +
+  //                                       "':" + error.message()));
+  //}
+
+  //auto outputLambda =
+  //    [&](llvm::unique_function<LLVMModuleAndContext()> produceModule,
+  //        std::optional<int64_t> idx, unsigned numFunctionsBase) mutable {
+  //      LLVMModuleAndContext subModule = produceModule();
+  //      if (clOptions.outputPrefix == "-") {
+  //        output->os() << "##############################################\n";
+  //        if (idx)
+  //          output->os() << "# [LLVM Module Split: submodule " << *idx << "]\n";
+  //        else
+  //          output->os() << "# [LLVM Module Split: main module]\n";
+  //        output->os() << "##############################################\n";
+  //        output->os() << *subModule;
+  //        output->os() << "\n";
+  //      } else {
+  //        std::string outPath;
+  //        if (!idx) {
+  //          outPath = clOptions.outputPrefix + ".ll";
+  //        } else {
+  //          outPath =
+  //              (clOptions.outputPrefix + "." + Twine(*idx) + ".ll").str();
+  //        }
+  //        auto outFile = mlir::openOutputFile(outPath);
+  //        if (!outFile) {
+  //          exit(clOptions.options.reportError("Cannot open output file: '" +
+  //                                             outPath + "."));
+  //        }
+  //        outFile->os() << *subModule;
+  //        outFile->keep();
+  //        llvm::outs() << "Write llvm module to " << outPath << "\n";
+  //      }
+  //    };
+
+  //llvm::StringMap<llvm::GlobalValue::LinkageTypes> symbolLinkageTypes;
+  //if (clOptions.perFunctionSplit)
+  //  splitPerFunction(std::move(module), outputLambda, symbolLinkageTypes);
+  //else
+  //  splitPerExported(std::move(module), outputLambda);
+
+  //if (output)
+  //  output->keep();
+  //return 0;
+}
diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index ef80af43c216b..6204af588fc0a 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
@@ -2085,6 +2085,24 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "ModuleSplitter",
+    srcs = glob([
+        "lib/ModuleSplitter/*.cpp",
+    ]),
+    hdrs = glob([
+        "include/llvm/ModuleSplitter/*.h",
+    ]),
+    copts = llvm_copts,
+    deps = [
+        ":BitReader",
+        ":BitWriter",
+        ":Core",
+        ":IRReader",
+        ":Support",
+    ],
+)
+
 llvm_target_lib_list = [lib for lib in [
     {
         "name": "AArch64",
@@ -5303,6 +5321,21 @@ cc_library(
     ],
 )
 
+cc_binary(
+    name = "llvm-module-splitter",
+    srcs = glob([
+        "tools/llvm-module-splitter/*.cpp",
+    ]),
+    copts = llvm_copts,
+    stamp = 0,
+    deps = [
+        ":Core",
+        ":IRReader",
+        ":ModuleSplitter",
+        ":Support",
+    ],
+)
+
 llvm_driver_cc_binary(
     name = "llvm-objcopy",
     stamp = 0,

>From fb12509514935633ab1318e620174450243b6746 Mon Sep 17 00:00:00 2001
From: Weiwei Chen <weiwei.chen at modular.com>
Date: Fri, 14 Mar 2025 16:50:57 -0400
Subject: [PATCH 08/17] Moving source file around.

---
 llvm/include/llvm/IR/ModuleSplitter.h      |  78 --
 llvm/include/llvm/Support/ModuleSplitter.h |  78 --
 llvm/lib/CMakeLists.txt                    |   1 +
 llvm/lib/IR/ModuleSplitter.cpp             | 815 --------------------
 llvm/lib/Support/CMakeLists.txt            |   1 -
 llvm/lib/Support/ModuleSplitter.cpp        | 825 ---------------------
 6 files changed, 1 insertion(+), 1797 deletions(-)
 delete mode 100644 llvm/include/llvm/IR/ModuleSplitter.h
 delete mode 100644 llvm/include/llvm/Support/ModuleSplitter.h
 delete mode 100644 llvm/lib/IR/ModuleSplitter.cpp
 delete mode 100644 llvm/lib/Support/ModuleSplitter.cpp

diff --git a/llvm/include/llvm/IR/ModuleSplitter.h b/llvm/include/llvm/IR/ModuleSplitter.h
deleted file mode 100644
index 912d8edb7c189..0000000000000
--- a/llvm/include/llvm/IR/ModuleSplitter.h
+++ /dev/null
@@ -1,78 +0,0 @@
-//===- ModuleSplitter.h - Module Splitter Functions -------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_MODULESPLITTER_H
-#define LLVM_SUPPORT_MODULESPLITTER_H
-
-#include "llvm/ADT/FunctionExtras.h"
-#include "llvm/ADT/StringSet.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Error.h"
-namespace llvm {
-
-//===----------------------------------------------------------------------===//
-// LLVMModuleAndContext
-//===----------------------------------------------------------------------===//
-
-/// A pair of an LLVM module and the LLVM context that holds ownership of the
-/// objects. This is a useful class for parallelizing LLVM and managing
-/// ownership of LLVM instances.
-class LLVMModuleAndContext {
-public:
-  /// Expose the underlying LLVM context to create the module. This is the only
-  /// way to access the LLVM context to prevent accidental sharing.
-  Expected<bool> create(
-      function_ref<Expected<std::unique_ptr<llvm::Module>>(llvm::LLVMContext &)>
-          CreateModule);
-
-  llvm::Module &operator*() { return *Module; }
-  llvm::Module *operator->() { return Module.get(); }
-
-  void reset();
-
-private:
-  /// LLVM context stored in a unique pointer so that we can move this type.
-  std::unique_ptr<llvm::LLVMContext> Ctx =
-      std::make_unique<llvm::LLVMContext>();
-  /// The paired LLVM module.
-  std::unique_ptr<llvm::Module> Module;
-};
-
-//===----------------------------------------------------------------------===//
-// Module Splitter
-//===----------------------------------------------------------------------===//
-
-using LLVMSplitProcessFn =
-    function_ref<void(llvm::unique_function<LLVMModuleAndContext()>,
-                      std::optional<int64_t>, unsigned)>;
-
-/// Helper to create a lambda that just forwards a preexisting Module.
-inline llvm::unique_function<LLVMModuleAndContext()>
-forwardModule(LLVMModuleAndContext &&Module) {
-  return [Module = std::move(Module)]() mutable { return std::move(Module); };
-}
-
-/// Support for splitting an LLVM module into multiple parts using anchored
-/// functions (e.g. exported functions), and pull in all dependency on the
-// call stack into one module.
-void splitPerAnchored(LLVMModuleAndContext Module,
-                      LLVMSplitProcessFn ProcessFn,
-                      llvm::SmallVectorImpl<llvm::Function>& Anchors);
-
-/// Support for splitting an LLVM module into multiple parts with each part
-/// contains only one function.
-void splitPerFunction(
-    LLVMModuleAndContext Module, LLVMSplitProcessFn ProcessFn);
-
-} // namespace llvm
-
-#endif
diff --git a/llvm/include/llvm/Support/ModuleSplitter.h b/llvm/include/llvm/Support/ModuleSplitter.h
deleted file mode 100644
index 912d8edb7c189..0000000000000
--- a/llvm/include/llvm/Support/ModuleSplitter.h
+++ /dev/null
@@ -1,78 +0,0 @@
-//===- ModuleSplitter.h - Module Splitter Functions -------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_MODULESPLITTER_H
-#define LLVM_SUPPORT_MODULESPLITTER_H
-
-#include "llvm/ADT/FunctionExtras.h"
-#include "llvm/ADT/StringSet.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Error.h"
-namespace llvm {
-
-//===----------------------------------------------------------------------===//
-// LLVMModuleAndContext
-//===----------------------------------------------------------------------===//
-
-/// A pair of an LLVM module and the LLVM context that holds ownership of the
-/// objects. This is a useful class for parallelizing LLVM and managing
-/// ownership of LLVM instances.
-class LLVMModuleAndContext {
-public:
-  /// Expose the underlying LLVM context to create the module. This is the only
-  /// way to access the LLVM context to prevent accidental sharing.
-  Expected<bool> create(
-      function_ref<Expected<std::unique_ptr<llvm::Module>>(llvm::LLVMContext &)>
-          CreateModule);
-
-  llvm::Module &operator*() { return *Module; }
-  llvm::Module *operator->() { return Module.get(); }
-
-  void reset();
-
-private:
-  /// LLVM context stored in a unique pointer so that we can move this type.
-  std::unique_ptr<llvm::LLVMContext> Ctx =
-      std::make_unique<llvm::LLVMContext>();
-  /// The paired LLVM module.
-  std::unique_ptr<llvm::Module> Module;
-};
-
-//===----------------------------------------------------------------------===//
-// Module Splitter
-//===----------------------------------------------------------------------===//
-
-using LLVMSplitProcessFn =
-    function_ref<void(llvm::unique_function<LLVMModuleAndContext()>,
-                      std::optional<int64_t>, unsigned)>;
-
-/// Helper to create a lambda that just forwards a preexisting Module.
-inline llvm::unique_function<LLVMModuleAndContext()>
-forwardModule(LLVMModuleAndContext &&Module) {
-  return [Module = std::move(Module)]() mutable { return std::move(Module); };
-}
-
-/// Support for splitting an LLVM module into multiple parts using anchored
-/// functions (e.g. exported functions), and pull in all dependency on the
-// call stack into one module.
-void splitPerAnchored(LLVMModuleAndContext Module,
-                      LLVMSplitProcessFn ProcessFn,
-                      llvm::SmallVectorImpl<llvm::Function>& Anchors);
-
-/// Support for splitting an LLVM module into multiple parts with each part
-/// contains only one function.
-void splitPerFunction(
-    LLVMModuleAndContext Module, LLVMSplitProcessFn ProcessFn);
-
-} // namespace llvm
-
-#endif
diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt
index f6465612d30c0..2201fcda0a7fd 100644
--- a/llvm/lib/CMakeLists.txt
+++ b/llvm/lib/CMakeLists.txt
@@ -24,6 +24,7 @@ add_subdirectory(Analysis)
 add_subdirectory(LTO)
 add_subdirectory(MC)
 add_subdirectory(MCA)
+add_subdirectory(ModuleSplitter)
 add_subdirectory(ObjCopy)
 add_subdirectory(Object)
 add_subdirectory(ObjectYAML)
diff --git a/llvm/lib/IR/ModuleSplitter.cpp b/llvm/lib/IR/ModuleSplitter.cpp
deleted file mode 100644
index 1778c0d4a2278..0000000000000
--- a/llvm/lib/IR/ModuleSplitter.cpp
+++ /dev/null
@@ -1,815 +0,0 @@
-//===--- ModuleSplitter.cpp - Module Splitter -------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/ModuleSplitter.h"
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/IntrusiveRefCntPtr.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Bitcode/BitcodeWriter.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/GlobalStatus.h"
-#include "llvm/Transforms/Utils/SplitModule.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-
-using namespace llvm;
-#define DEBUG_TYPE "llvm-module-split"
-
-//===----------------------------------------------------------------------===//
-// LLVMModuleAndContext
-//===----------------------------------------------------------------------===//
-
-Expected<bool> LLVMModuleAndContext::create(
-    function_ref<Expected<std::unique_ptr<llvm::Module>>(llvm::LLVMContext &)>
-        CreateModule) {
-  assert(!Module && "already have a module");
-  auto ModuleOr = CreateModule(*Ctx);
-  if (Error Err = ModuleOr.takeError())
-    return Err;
-
-  Module = std::move(*ModuleOr);
-  return true;
-}
-
-void LLVMModuleAndContext::reset() {
-  Module.reset();
-  Ctx.reset();
-}
-
-//===----------------------------------------------------------------------===//
-// StringConstantTable
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// Large strings are very inefficiently encoded in LLVM bitcode (each `char` is
-/// encoded as a `uint64_t`). The LLVM bitcode reader is also very inefficiently
-/// reads strings back, performing 3 ultimate copies of the data. This is made
-/// worse by the fact the `getLazyBitcodeModule` does not lazily parse constants
-/// from the LLVM bitcode. Thus, when per-function splitting a module with N
-/// functions and M large string constants, we form 3*M*N copies of the large
-/// strings.
-///
-/// This class is part of a workaround of this inefficiency. When processing a
-/// module for splitting, we track any string global constants and their indices
-/// in this table. If a module is going to be roundtripped through bitcode to be
-/// lazily loaded, we externalize the strings by setting the corresponding
-/// constants to `zeroinitializer` in the module before it is written to
-/// bitcode. As we materialize constants on the other side, we check for a
-/// materialized global variable that matches an entry in the string table and
-/// directly copy the data over into the new LLVM context.
-///
-/// We can generalize this optimization to other large data types as necessary.
-///
-/// This class is used in an `RCRef` to be shared across multiple threads.
-class StringConstantTable
-    : public ThreadSafeRefCountedBase<StringConstantTable> {
-  /// An entry in the string table consists of a global variable, its module
-  /// index, and the a reference to the string data. Because the string data is
-  /// owned by the original LLVM context, we have to ensure it stays alive.
-  struct Entry {
-    unsigned Idx;
-    const llvm::GlobalVariable *Var;
-    StringRef Value;
-  };
-
-public:
-  /// If `Value` denotes a string constant, record the data at index `GvIdx`.
-  void recordIfStringConstant(unsigned GvIdx, const llvm::GlobalValue &Value) {
-    auto Var = dyn_cast<llvm::GlobalVariable>(&Value);
-    if (Var && Var->isConstant() && Var->hasInternalLinkage()) {
-      auto *Init =
-          dyn_cast<llvm::ConstantDataSequential>(Var->getInitializer());
-      if (Init && Init->isCString())
-        StringConstants.push_back(Entry{GvIdx, Var, Init->getAsString()});
-    }
-  }
-
-  /// Before writing the main Module to bitcode, externalize large string
-  /// constants by stubbing out their values. Take ownership of the main Module
-  /// so the string data stays alive.
-  llvm::Module &externalizeStrings(LLVMModuleAndContext &&Module) {
-    MainModule = std::move(Module);
-    // Stub the initializers. The global variable is an internal constant, so it
-    // must have an initializer.
-    for (Entry &E : StringConstants) {
-      auto *Stub =
-          llvm::Constant::getNullValue(E.Var->getInitializer()->getType());
-      // `const_cast` is OK because we own the module now.
-      const_cast<llvm::GlobalVariable *>(E.Var)->setInitializer(Stub);
-    }
-    return *MainModule;
-  }
-
-  /// This is an iterator over the entries in the string table.
-  class Injector {
-    using const_iterator = std::vector<Entry>::const_iterator;
-
-  public:
-    /// Given a global variable in a materialized module and its index, if it is
-    /// a string constant found in the table, copy the data over into the new
-    /// LLVM context and set the initializer.
-    void materializeIfStringConstant(unsigned GvIdx,
-                                     llvm::GlobalVariable &Var) {
-      while (It != Et && It->Idx < GvIdx)
-        ++It;
-      if (It == Et || It->Idx != GvIdx)
-        return;
-      Var.setInitializer(llvm::ConstantDataArray::getString(
-          Var.getType()->getContext(), It->Value, /*AddNull=*/false));
-    }
-
-  private:
-    explicit Injector(const_iterator It, const_iterator Et) : It(It), Et(Et) {}
-
-    const_iterator It, Et;
-
-    friend class StringConstantTable;
-  };
-
-  Injector begin() const {
-    return Injector(StringConstants.begin(), StringConstants.end());
-  }
-
-private:
-  std::vector<Entry> StringConstants;
-  LLVMModuleAndContext MainModule;
-};
-
-//===----------------------------------------------------------------------===//
-// Module Splitter
-//===----------------------------------------------------------------------===//
-
-class LLVMModuleSplitterImpl {
-public:
-  explicit LLVMModuleSplitterImpl(LLVMModuleAndContext Module)
-      : MainModule(std::move(Module)) {}
-
-  /// Split the LLVM module into multiple modules using the provided process
-  /// function.
-  void split(LLVMSplitProcessFn ProcessFn,
-             llvm::SmallVectorImpl<llvm::Function> &Anchors);
-
-private:
-  struct ValueInfo {
-    /// The immediate global value dependencies of a value.
-    SmallVector<const llvm::GlobalValue *> Dependencies;
-    /// Map each global value to its index in the module. We will use this to
-    /// materialize global values from bitcode.
-    unsigned GvIdx;
-  };
-
-  struct TransitiveDeps {
-    /// The transitive dependencies.
-    llvm::MapVector<const llvm::GlobalValue *, unsigned> Deps;
-    /// True if computation is complete.
-    bool Complete = false;
-    /// The assigned module index.
-    std::optional<unsigned> MutIdx;
-  };
-
-  /// Collect the immediate global value dependencies of `Value`. `Orig` is the
-  /// original transitive value, which is not equal to `Value` when it is used
-  /// in a constant.
-  void collectImmediateDependencies(const llvm::Value *Value,
-                                    const llvm::GlobalValue *Orig);
-
-  /// The main LLVM module being split.
-  LLVMModuleAndContext MainModule;
-
-  /// The value info for each global value in the module.
-  llvm::DenseMap<const llvm::Value *, ValueInfo> Infos;
-
-  /// The transitive dependencies of each global value.
-  llvm::MapVector<const llvm::GlobalValue *, TransitiveDeps> TransDeps;
-
-  /// Users of split "anchors". These are global values where we don't want
-  /// their users to be split into different modules because it will cause the
-  /// symbol to be duplicated.
-  llvm::MapVector<const llvm::GlobalValue *, llvm::SetVector<TransitiveDeps *>>
-      SplitAnchorUsers;
-};
-} // namespace
-
-static LLVMModuleAndContext readAndMaterializeDependencies(
-    MemoryBufferRef &Buf,
-    const llvm::MapVector<const llvm::GlobalValue *, unsigned> &Set,
-    const StringConstantTable &Strtab) {
-
-  // First, create a lazy module with an internal bitcode materializer.
-  // TODO: Not sure how to make lazy loading metadata work.
-  LLVMModuleAndContext Result;
-  {
-    (void)Result.create(
-        [&](llvm::LLVMContext &Ctx) -> Expected<std::unique_ptr<Module>> {
-          return llvm::cantFail(
-              llvm::getLazyBitcodeModule(Buf, Ctx,
-                                         /*ShouldLazyLoadMetadata=*/false));
-        });
-    Result->setModuleInlineAsm("");
-  }
-
-  SmallVector<unsigned> SortIndices =
-      llvm::to_vector(llvm::make_second_range(Set));
-  llvm::sort(SortIndices, std::less<unsigned>());
-  auto* IdxIt = SortIndices.begin();
-  auto* IdxEnd = SortIndices.end();
-
-  // The global value indices go from globals, functions, then aliases. This
-  // mirrors the order in which global values are deleted by LLVM's GlobalDCE.
-  unsigned CurIdx = 0;
-  StringConstantTable::Injector It = Strtab.begin();
-  // We need to keep the IR "valid" for the verifier because `materializeAll`
-  // may invoke it. It doesn't matter since we're deleting the globals anyway.
-  for (llvm::GlobalVariable &Global : Result->globals()) {
-    if (IdxIt != IdxEnd && CurIdx == *IdxIt) {
-      ++IdxIt;
-      llvm::cantFail(Global.materialize());
-      It.materializeIfStringConstant(CurIdx, Global);
-    } else {
-      Global.setInitializer(nullptr);
-      Global.setComdat(nullptr);
-      Global.setLinkage(llvm::GlobalValue::ExternalLinkage);
-      // External link should not be DSOLocal anymore,
-      // otherwise position independent code generates
-      // `R_X86_64_PC32` instead of `R_X86_64_REX_GOTPCRELX`
-      // for these symbols and building shared library from
-      // a static archive of this module will error with an `fPIC` confusion.
-      Global.setDSOLocal(false);
-    }
-    ++CurIdx;
-  }
-  for (llvm::Function &Func : Result->functions()) {
-    if (IdxIt != IdxEnd && CurIdx == *IdxIt) {
-      ++IdxIt;
-      llvm::cantFail(Func.materialize());
-    } else {
-      Func.deleteBody();
-      Func.setComdat(nullptr);
-      Func.setLinkage(llvm::GlobalValue::ExternalLinkage);
-      // External link should not be DSOLocal anymore,
-      // otherwise position independent code generates
-      // `R_X86_64_PC32` instead of `R_X86_64_REX_GOTPCRELX`
-      // for these symbols and building shared library from
-      // a static archive of this module will error with an `fPIC` confusion.
-      // External link should not be DSOLocal anymore,
-      // otherwise position independent code generation get confused.
-      Func.setDSOLocal(false);
-    }
-    ++CurIdx;
-  }
-
-  // Finalize materialization of the module.
-  llvm::cantFail(Result->materializeAll());
-
-  // Now that the module is materialized, we can start deleting stuff. Just
-  // delete declarations with no uses.
-  for (llvm::GlobalVariable &Global :
-       llvm::make_early_inc_range(Result->globals())) {
-    if (Global.isDeclaration() && Global.use_empty())
-      Global.eraseFromParent();
-  }
-  for (llvm::Function &Func : llvm::make_early_inc_range(Result->functions())) {
-    if (Func.isDeclaration() && Func.use_empty())
-      Func.eraseFromParent();
-  }
-  return Result;
-}
-
-/// support for splitting an LLVM module into multiple parts using exported
-/// functions as anchors, and pull in all dependency on the call stack into one
-/// module.
-void splitPerAnchored(LLVMModuleAndContext Module, LLVMSplitProcessFn ProcessFn,
-                      llvm::SmallVectorImpl<llvm::Function> &Anchors) {
-  LLVMModuleSplitterImpl Impl(std::move(Module));
-  Impl.split(ProcessFn, Anchors);
-}
-
-void LLVMModuleSplitterImpl::split(
-    LLVMSplitProcessFn ProcessFn,
-    llvm::SmallVectorImpl<llvm::Function> &Anchors) {
-  // The use-def list is sparse. Use it to build a sparse dependency graph
-  // between global values.
-  IntrusiveRefCntPtr<StringConstantTable> Strtab(new StringConstantTable());
-  unsigned GvIdx = 0;
-
-  auto ComputeDeps = [&](const llvm::GlobalValue &value) {
-    Strtab->recordIfStringConstant(GvIdx, value);
-    Infos[&value].GvIdx = GvIdx++;
-    collectImmediateDependencies(&value, &value);
-  };
-  // NOTE: The visitation of globals then functions has to line up with
-  // `readAndMaterializeDependencies`.
-  for (const llvm::GlobalVariable &global : MainModule->globals()) {
-    ComputeDeps(global);
-    if (!global.hasInternalLinkage() && !global.hasPrivateLinkage())
-      TransDeps[&global];
-  }
-  for (const llvm::Function &Fn : MainModule->functions()) {
-    ComputeDeps(Fn);
-    if (!Fn.isDeclaration() && (Fn.hasExternalLinkage() || Fn.hasWeakLinkage()))
-      TransDeps[&Fn];
-  }
-
-  // If there is only one (or fewer) exported functions, forward the main
-  // module.
-  if (TransDeps.size() <= 1)
-    return ProcessFn(forwardModule(std::move(MainModule)), std::nullopt,
-                     /*numFunctionBase=*/0);
-
-  // Now for each export'd global value, compute the transitive set of
-  // dependencies using DFS.
-  SmallVector<const llvm::GlobalValue *> Worklist;
-  for (auto &[Value, Deps] : TransDeps) {
-    Worklist.clear();
-    Worklist.push_back(Value);
-    while (!Worklist.empty()) {
-      const llvm::GlobalValue *It = Worklist.pop_back_val();
-
-      auto [iter, inserted] = Deps.Deps.insert({It, -1});
-      if (!inserted) {
-        // Already visited.
-        continue;
-      }
-      // Pay the cost of the name lookup only on a miss.
-      const ValueInfo &Info = Infos.at(It);
-      iter->second = Info.GvIdx;
-
-      // If this value depends on another value that is going to be split, we
-      // don't want to duplicate the symbol. Keep all the users together.
-      if (It != Value) {
-        if (auto* DepIt = TransDeps.find(It);
-            DepIt != TransDeps.end()) {
-          auto &Users = SplitAnchorUsers[It];
-          Users.insert(&Deps);
-          // Make sure to include the other value in its own user list.
-          Users.insert(&DepIt->second);
-          // We don't have to recurse since the subgraph will get processed.
-          continue;
-        }
-      }
-
-      // If this value depends on a mutable global, keep track of it. We have to
-      // put all users of a mutable global in the same module.
-      if (auto *Global = dyn_cast<llvm::GlobalVariable>(It);
-          Global && !Global->isConstant())
-        SplitAnchorUsers[Global].insert(&Deps);
-
-      // Recursive on dependencies.
-      llvm::append_range(Worklist, Info.Dependencies);
-    }
-
-    Deps.Complete = true;
-  }
-
-  // For each mutable global, grab all the transitive users and put them in one
-  // module. If global A has user set A* and global B has user set B* where
-  // A* and B* have an empty intersection, all values in A* will be assigned 0
-  // and all values in B* will be assigned 1. If global C has user set C* that
-  // overlaps both A* and B*, it will overwrite both to 2.
-  SmallVector<SmallVector<TransitiveDeps *>> Bucketing(SplitAnchorUsers.size());
-  for (auto [CurMutIdx, Bucket, Users] :
-       llvm::enumerate(Bucketing, llvm::make_second_range(SplitAnchorUsers))) {
-    for (TransitiveDeps *Deps : Users) {
-      if (Deps->MutIdx && *Deps->MutIdx != CurMutIdx) {
-        auto &OtherBucket = Bucketing[*Deps->MutIdx];
-        for (TransitiveDeps *Other : OtherBucket) {
-          Bucket.push_back(Other);
-          Other->MutIdx = CurMutIdx;
-        }
-        OtherBucket.clear();
-        assert(*Deps->MutIdx == CurMutIdx);
-      } else {
-        Bucket.push_back(Deps);
-        Deps->MutIdx = CurMutIdx;
-      }
-    }
-  }
-
-  // Now that we have assigned buckets to each value, merge the transitive
-  // dependency sets of all values belonging to the same set.
-  SmallVector<llvm::MapVector<const llvm::GlobalValue *, unsigned>> Buckets(
-      Bucketing.size());
-  for (auto [Deps, Bucket] : llvm::zip(Bucketing, Buckets)) {
-    for (TransitiveDeps *Dep : Deps) {
-      for (auto &NamedValue : Dep->Deps)
-        Bucket.insert(NamedValue);
-    }
-  }
-
-  SmallVector<llvm::MapVector<const llvm::GlobalValue *, unsigned> *>
-      SetsToProcess;
-  SetsToProcess.reserve(Buckets.size() + TransDeps.size());
-
-  // Clone each mutable global bucket into its own module.
-  for (auto &Bucket : Buckets) {
-    if (Bucket.empty())
-      continue;
-    SetsToProcess.push_back(&Bucket);
-  }
-
-  for (auto &[Root, Deps] : TransDeps) {
-    // Skip values included in another transitive dependency set and values
-    // included in mutable global sets.
-    if (!Deps.MutIdx)
-      SetsToProcess.push_back(&Deps.Deps);
-  }
-
-  if (SetsToProcess.size() <= 1)
-    return ProcessFn(forwardModule(std::move(MainModule)), std::nullopt,
-                     /*numFunctionBase=*/0);
-
-  // Sort the sets by to schedule the larger modules first.
-  llvm::sort(SetsToProcess,
-             [](auto *Lhs, auto *Rhs) { return Lhs->size() > Rhs->size(); });
-
-  // Prepare to materialize slices of the module by first writing the main
-  // module as bitcode to a shared buffer.
-  std::string BufStr;
-  llvm::raw_string_ostream BufOS(BufStr);
-  {
-    llvm::Module &Module = Strtab->externalizeStrings(std::move(MainModule));
-    llvm::WriteBitcodeToFile(Module, BufOS);
-  }
-
-  auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(BufStr.size());
-  memcpy(Buf->getBufferStart(), BufStr.c_str(), BufStr.size());
-
-  unsigned NumFunctions = 0;
-  for (auto [Idx, Set] : llvm::enumerate(SetsToProcess)) {
-    unsigned Next = NumFunctions + Set->size();
-    auto MakeModule =
-        [Set = std::move(*Set),
-         Buf = MemoryBufferRef((*Buf).MemoryBuffer::getBuffer(), ""),
-         Strtab = Strtab]() mutable {
-          return readAndMaterializeDependencies(Buf, Set, *Strtab);
-        };
-    ProcessFn(std::move(MakeModule), Idx, NumFunctions);
-    NumFunctions = Next;
-  }
-}
-
-void LLVMModuleSplitterImpl::collectImmediateDependencies(
-    const llvm::Value *Value, const llvm::GlobalValue *Orig) {
-  for (const llvm::Value *User : Value->users()) {
-    // Recurse into pure constant users.
-    if (isa<llvm::Constant>(User) && !isa<llvm::GlobalValue>(User)) {
-      collectImmediateDependencies(User, Orig);
-      continue;
-    }
-
-    if (auto *Inst = dyn_cast<llvm::Instruction>(User)) {
-      const llvm::Function *Func = Inst->getParent()->getParent();
-      Infos[Func].Dependencies.push_back(Orig);
-    } else if (auto *GlobalVal = dyn_cast<llvm::GlobalValue>(User)) {
-      Infos[GlobalVal].Dependencies.push_back(Orig);
-    } else {
-      llvm_unreachable("unexpected user of global value");
-    }
-  }
-}
-
-namespace {
-/// This class provides support for splitting an LLVM module into multiple
-/// parts.
-/// TODO: Clean up the splitters here (some code duplication) when we can move
-/// to per function llvm compilation.
-class LLVMModulePerFunctionSplitterImpl {
-public:
-  LLVMModulePerFunctionSplitterImpl(LLVMModuleAndContext Module)
-      : mainModule(std::move(Module)) {}
-
-  /// Split the LLVM module into multiple modules using the provided process
-  /// function.
-  void
-  split(LLVMSplitProcessFn ProcessFn,
-        llvm::StringMap<llvm::GlobalValue::LinkageTypes> &SymbolLinkageTypes,
-        unsigned NumFunctionBase);
-
-private:
-  struct ValueInfo {
-    const llvm::Value *Value = nullptr;
-    bool CanBeSplit = true;
-    llvm::SmallPtrSet<const llvm::GlobalValue *, 4> Dependencies;
-    llvm::SmallPtrSet<const llvm::GlobalValue *, 4> Users;
-    /// Map each global value to its index in the module. We will use this to
-    /// materialize global values from bitcode.
-    unsigned GvIdx;
-    bool UserEmpty = true;
-  };
-
-  /// Collect all of the immediate global value users of `value`.
-  void collectValueUsers(const llvm::GlobalValue *Value);
-
-  /// Propagate use information through the module.
-  void propagateUseInfo();
-
-  /// The main LLVM module being split.
-  LLVMModuleAndContext MainModule;
-
-  /// The value info for each global value in the module.
-  llvm::MapVector<const llvm::GlobalValue *, ValueInfo> ValueInfos;
-};
-} // namespace
-
-/// support for splitting an LLVM module into multiple parts with each part
-/// contains only one function (with exception for coroutine related functions.)
-void splitPerFunction(
-    LLVMModuleAndContext Module, LLVMSplitProcessFn ProcessFn,
-    llvm::StringMap<llvm::GlobalValue::LinkageTypes> &SymbolLinkageTypes,
-    unsigned NumFunctionBase) {
-  LLVMModulePerFunctionSplitterImpl Impl(std::move(Module));
-  Impl.split(ProcessFn, SymbolLinkageTypes, NumFunctionBase);
-}
-
-/// Split the LLVM module into multiple modules using the provided process
-/// function.
-void LLVMModulePerFunctionSplitterImpl::split(
-    LLVMSplitProcessFn ProcessFn,
-    llvm::StringMap<llvm::GlobalValue::LinkageTypes> &SymbolLinkageTypes,
-    unsigned NumFunctionBase) {
-  // Compute the value info for each global in the module.
-  // NOTE: The visitation of globals then functions has to line up with
-  // `readAndMaterializeDependencies`.
-  IntrusiveRefCntPtr<StringConstantTable> Strtab(new StringConstantTable());
-  unsigned GvIdx = 0;
-  auto ComputeUsers = [&](const llvm::GlobalValue &Value) {
-    Strtab->recordIfStringConstant(GvIdx, Value);
-    ValueInfos[&Value].GvIdx = GvIdx++;
-    collectValueUsers(&Value);
-  };
-  llvm::for_each(MainModule->globals(), ComputeUsers);
-  llvm::for_each(MainModule->functions(), ComputeUsers);
-
-  // With use information collected, propagate it to the dependencies.
-  propagateUseInfo();
-
-  // Now we can split the module.
-  // We split the module per function and cloning any necessary dependencies:
-  // - For function dependencies, only clone the declaration unless its
-  //   coroutine related.
-  // - For other internal values, clone as is.
-  // This is much fine-grained splitting, which enables significantly higher
-  // levels of parallelism (and smaller generated artifacts).
-  // LLVM LTO style optimization may suffer a bit here since we don't have
-  // the full callstack present anymore in each cloned module.
-  llvm::DenseSet<const llvm::Value *> SplitValues;
-  SmallVector<llvm::MapVector<const llvm::GlobalValue *, unsigned>>
-      SetsToProcess;
-
-  // Hoist these collections to re-use memory allocations.
-  llvm::ValueToValueMapTy ValueMap;
-  SmallPtrSet<const llvm::Value *, 4> SplitDeps;
-  auto SplitValue = [&](const llvm::GlobalValue *Root) {
-    // If the function is already split, e.g. if it was a dependency of
-    // another function, skip it.
-    if (SplitValues.count(Root))
-      return;
-
-    auto &ValueInfo = ValueInfos[Root];
-    ValueMap.clear();
-    SplitDeps.clear();
-    auto ShouldSplit = [&](const llvm::GlobalValue *GlobalVal,
-                           const struct ValueInfo &Info) {
-      // Only clone root and the declaration of its dependencies.
-      if (GlobalVal == Root) {
-        SplitDeps.insert(GlobalVal);
-        return true;
-      }
-
-      if ((Info.CanBeSplit || Info.UserEmpty) &&
-          isa_and_nonnull<llvm::Function>(GlobalVal))
-        return false;
-
-      if (ValueInfo.Dependencies.contains(GlobalVal)) {
-        SplitDeps.insert(GlobalVal);
-        return true;
-      }
-
-      return false;
-    };
-
-    auto &Set = SetsToProcess.emplace_back();
-    for (auto &[GlobalVal, Info] : ValueInfos) {
-      if (ShouldSplit(GlobalVal, Info))
-        Set.insert({GlobalVal, Info.GvIdx});
-    }
-    if (Set.empty())
-      SetsToProcess.pop_back();
-
-    // Record the split values.
-    SplitValues.insert(SplitDeps.begin(), SplitDeps.end());
-  };
-
-  [[maybe_unused]] int64_t Count = 0;
-  SmallVector<const llvm::GlobalValue *> ToSplit;
-  unsigned UnnamedGlobal = NumFunctionBase;
-
-  for (auto &Global : MainModule->globals()) {
-    if (Global.hasInternalLinkage() || Global.hasPrivateLinkage()) {
-      if (!Global.hasName()) {
-        // Give unnamed GlobalVariable a unique name so that MCLink will not get
-        // confused to name them while generating linked code since the IR
-        // values can be different in each splits (for X86 backend.)
-        // asan build inserts these unnamed GlobalVariables.
-        Global.setName("__llvm_split_unnamed" + Twine(UnnamedGlobal++));
-      }
-
-      SymbolLinkageTypes.insert({Global.getName().str(), Global.getLinkage()});
-      Global.setLinkage(llvm::GlobalValue::WeakAnyLinkage);
-      continue;
-    }
-
-    if (Global.hasExternalLinkage())
-      continue;
-
-    // TODO: Add special handling for `llvm.global_ctors` and
-    // `llvm.global_dtors`, because otherwise they end up tying almost all
-    // symbols into the same split.
-    LLVM_DEBUG(llvm::dbgs()
-                   << (Count++) << ": split global: " << Global << "\n";);
-    ToSplit.emplace_back(&Global);
-  }
-
-  for (auto &Fn : MainModule->functions()) {
-    if (Fn.isDeclaration())
-      continue;
-
-    ValueInfo &Info = ValueInfos[&Fn];
-    if (Fn.hasInternalLinkage() || Fn.hasPrivateLinkage()) {
-      // Avoid renaming when linking in MCLink.
-      SymbolLinkageTypes.insert({Fn.getName().str(), Fn.getLinkage()});
-      Fn.setLinkage(llvm::Function::LinkageTypes::WeakAnyLinkage);
-    }
-
-    if (Info.CanBeSplit || Info.UserEmpty) {
-      LLVM_DEBUG(llvm::dbgs()
-                     << (Count++) << ": split fn: " << Fn.getName() << "\n";);
-      ToSplit.emplace_back(&Fn);
-    }
-  }
-
-  // Run this now since we just changed the linkages.
-  for (const llvm::GlobalValue *Value : ToSplit)
-    SplitValue(Value);
-
-  if (SetsToProcess.size() <= 1)
-    return ProcessFn(forwardModule(std::move(MainModule)), std::nullopt,
-                     NumFunctionBase);
-
-  // Prepare to materialize slices of the module by first writing the main
-  // module as bitcode to a shared buffer.
-  std::string BufStr;
-  llvm::raw_string_ostream BufOS(BufStr);
-  {
-    llvm::Module &Module = Strtab->externalizeStrings(std::move(MainModule));
-    llvm::WriteBitcodeToFile(Module, BufOS);
-  }
-
-  auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(BufStr.size());
-  memcpy(Buf->getBufferStart(), BufStr.c_str(), BufStr.size());
-  unsigned NumFunctions = 0;
-  for (auto [Idx, Set] : llvm::enumerate(SetsToProcess)) {
-    unsigned Next = NumFunctions + Set.size();
-    // Giving each function a unique ID across all splits for proper MC level
-    // linking and codegen into one object file where duplicated functions
-    // in each split will be deduplicated (with the linking).
-    auto MakeModule =
-        [Set = std::move(Set),
-         Buf = MemoryBufferRef((*Buf).MemoryBuffer::getBuffer(), ""),
-         Strtab = Strtab]() mutable {
-          return readAndMaterializeDependencies(Buf, Set, *Strtab);
-        };
-    ProcessFn(std::move(MakeModule), Idx, NumFunctions);
-    NumFunctions = Next;
-  }
-}
-
-/// Collect all of the immediate global value users of `value`.
-void LLVMModulePerFunctionSplitterImpl::collectValueUsers(
-    const llvm::GlobalValue *Value) {
-  SmallVector<const llvm::User *> Worklist(Value->users());
-
-  while (!Worklist.empty()) {
-    const llvm::User *UserIt = Worklist.pop_back_val();
-
-    // Recurse into pure constant users.
-    if (isa<llvm::Constant>(UserIt) && !isa<llvm::GlobalValue>(UserIt)) {
-      Worklist.append(UserIt->user_begin(), UserIt->user_end());
-      continue;
-    }
-
-    if (const auto *Inst = dyn_cast<llvm::Instruction>(UserIt)) {
-      const llvm::Function *Func = Inst->getParent()->getParent();
-      ValueInfos[Value].Users.insert(Func);
-      ValueInfos[Func];
-    } else if (const auto *GlobalVal = dyn_cast<llvm::GlobalValue>(UserIt)) {
-      ValueInfos[Value].Users.insert(GlobalVal);
-      ValueInfos[GlobalVal];
-    } else {
-      llvm_unreachable("unexpected user of global value");
-    }
-  }
-
-  // If the current value is a mutable global variable, then it can't be
-  // split.
-  if (auto *Global = dyn_cast<llvm::GlobalVariable>(Value))
-    ValueInfos[Value].CanBeSplit = Global->isConstant();
-}
-
-/// Propagate use information through the module.
-void LLVMModulePerFunctionSplitterImpl::propagateUseInfo() {
-  std::vector<ValueInfo *> Worklist;
-
-  // Each value depends on itself. Seed the iteration with that.
-  for (auto &[Value, Info] : ValueInfos) {
-    if (auto Func = llvm::dyn_cast<llvm::Function>(Value)) {
-      if (Func->isDeclaration())
-        continue;
-    }
-
-    Info.Dependencies.insert(Value);
-    Info.Value = Value;
-    Worklist.push_back(&Info);
-    if (!Info.CanBeSplit) {
-      // If a value cannot be split, its users are also its dependencies.
-      llvm::set_union(Info.Dependencies, Info.Users);
-    }
-  }
-
-  while (!Worklist.empty()) {
-    ValueInfo *Info = Worklist.back();
-    Worklist.pop_back();
-
-    // Propagate the dependencies of this value to its users.
-    for (const llvm::GlobalValue *User : Info->Users) {
-      ValueInfo &UserInfo = ValueInfos.find(User)->second;
-      if (Info == &UserInfo)
-        continue;
-      bool Changed = false;
-
-      // Merge dependency to user if current value is not a function that will
-      // be split into a separate module.
-      bool MergeToUserDep = true;
-      if (llvm::isa_and_nonnull<llvm::Function>(Info->Value)) {
-        MergeToUserDep = !Info->CanBeSplit;
-      }
-
-      // If there is a change, add the user info to the worklist.
-      if (MergeToUserDep) {
-        if (llvm::set_union(UserInfo.Dependencies, Info->Dependencies))
-          Changed = true;
-      }
-
-      // If the value cannot be split, its users cannot be split either.
-      if (!Info->CanBeSplit && UserInfo.CanBeSplit) {
-        UserInfo.CanBeSplit = false;
-        Changed = true;
-        // If a value cannot be split, its users are also its dependencies.
-        llvm::set_union(UserInfo.Dependencies, UserInfo.Users);
-      }
-
-      if (Changed) {
-        UserInfo.Value = User;
-        Worklist.push_back(&UserInfo);
-      }
-    }
-
-    if (Info->CanBeSplit || isa_and_nonnull<llvm::GlobalValue>(Info->Value))
-      continue;
-
-    // If a value cannot be split, propagate its dependencies up to its
-    // dependencies.
-    for (const llvm::GlobalValue *Dep : Info->Dependencies) {
-      ValueInfo &DepInfo = ValueInfos.find(Dep)->second;
-      if (Info == &DepInfo)
-        continue;
-      if (llvm::set_union(DepInfo.Dependencies, Info->Dependencies)) {
-        DepInfo.Value = Dep;
-        Worklist.push_back(&DepInfo);
-      }
-    }
-  }
-
-  for (auto &[Value, Info] : ValueInfos) {
-    Info.UserEmpty = Info.Users.empty() ||
-                     (Info.Users.size() == 1 && Info.Users.contains(Value));
-  }
-}
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 8cd5a79f252bd..49a26a618de83 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -217,7 +217,6 @@ add_llvm_component_library(LLVMSupport
   MemoryBuffer.cpp
   MemoryBufferRef.cpp
   ModRef.cpp
-  ModuleSplitter.cpp
   MD5.cpp
   MSP430Attributes.cpp
   MSP430AttributeParser.cpp
diff --git a/llvm/lib/Support/ModuleSplitter.cpp b/llvm/lib/Support/ModuleSplitter.cpp
deleted file mode 100644
index 1fc45f415e3ac..0000000000000
--- a/llvm/lib/Support/ModuleSplitter.cpp
+++ /dev/null
@@ -1,825 +0,0 @@
-//===--- ModuleSplitter.cpp - Module Splitter -------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/ModuleSplitter.h"
-
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/IntrusiveRefCntPtr.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Bitcode/BitcodeWriter.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/GlobalStatus.h"
-#include "llvm/Transforms/Utils/SplitModule.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-
-using namespace llvm;
-#define DEBUG_TYPE "llvm-module-split"
-
-//===----------------------------------------------------------------------===//
-// LLVMModuleAndContext
-//===----------------------------------------------------------------------===//
-
-Expected<bool> LLVMModuleAndContext::create(
-    function_ref<Expected<std::unique_ptr<llvm::Module>>(llvm::LLVMContext &)>
-        CreateModule) {
-  assert(!Module && "already have a module");
-  auto ModuleOr = CreateModule(*Ctx);
-  if (Error Err = ModuleOr.takeError())
-    return Err;
-
-  Module = std::move(*ModuleOr);
-  return true;
-}
-
-void LLVMModuleAndContext::reset() {
-  Module.reset();
-  Ctx.reset();
-}
-
-//===----------------------------------------------------------------------===//
-// StringConstantTable
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// Large strings are very inefficiently encoded in LLVM bitcode (each `char` is
-/// encoded as a `uint64_t`). The LLVM bitcode reader is also very inefficiently
-/// reads strings back, performing 3 ultimate copies of the data. This is made
-/// worse by the fact the `getLazyBitcodeModule` does not lazily parse constants
-/// from the LLVM bitcode. Thus, when per-function splitting a module with N
-/// functions and M large string constants, we form 3*M*N copies of the large
-/// strings.
-///
-/// This class is part of a workaround of this inefficiency. When processing a
-/// module for splitting, we track any string global constants and their indices
-/// in this table. If a module is going to be roundtripped through bitcode to be
-/// lazily loaded, we externalize the strings by setting the corresponding
-/// constants to `zeroinitializer` in the module before it is written to
-/// bitcode. As we materialize constants on the other side, we check for a
-/// materialized global variable that matches an entry in the string table and
-/// directly copy the data over into the new LLVM context.
-///
-/// We can generalize this optimization to other large data types as necessary.
-///
-/// This class is used in an `RCRef` to be shared across multiple threads.
-class StringConstantTable
-    : public ThreadSafeRefCountedBase<StringConstantTable> {
-  /// An entry in the string table consists of a global variable, its module
-  /// index, and the a reference to the string data. Because the string data is
-  /// owned by the original LLVM context, we have to ensure it stays alive.
-  struct Entry {
-    unsigned Idx;
-    const llvm::GlobalVariable *Var;
-    StringRef Value;
-  };
-
-public:
-  /// If `Value` denotes a string constant, record the data at index `GvIdx`.
-  void recordIfStringConstant(unsigned GvIdx, const llvm::GlobalValue &Value) {
-    auto Var = dyn_cast<llvm::GlobalVariable>(&Value);
-    if (Var && Var->isConstant() && Var->hasInternalLinkage()) {
-      auto *Init =
-          dyn_cast<llvm::ConstantDataSequential>(Var->getInitializer());
-      if (Init && Init->isCString())
-        StringConstants.push_back(Entry{GvIdx, Var, Init->getAsString()});
-    }
-  }
-
-  /// Before writing the main Module to bitcode, externalize large string
-  /// constants by stubbing out their values. Take ownership of the main Module
-  /// so the string data stays alive.
-  llvm::Module &externalizeStrings(LLVMModuleAndContext &&Module) {
-    MainModule = std::move(Module);
-    // Stub the initializers. The global variable is an internal constant, so it
-    // must have an initializer.
-    for (Entry &E : StringConstants) {
-      auto *Stub =
-          llvm::Constant::getNullValue(E.Var->getInitializer()->getType());
-      // `const_cast` is OK because we own the module now.
-      const_cast<llvm::GlobalVariable *>(E.Var)->setInitializer(Stub);
-    }
-    return *MainModule;
-  }
-
-  /// This is an iterator over the entries in the string table.
-  class Injector {
-    using const_iterator = std::vector<Entry>::const_iterator;
-
-  public:
-    /// Given a global variable in a materialized module and its index, if it is
-    /// a string constant found in the table, copy the data over into the new
-    /// LLVM context and set the initializer.
-    void materializeIfStringConstant(unsigned GvIdx,
-                                     llvm::GlobalVariable &Var) {
-      while (It != Et && It->Idx < GvIdx)
-        ++It;
-      if (It == Et || It->Idx != GvIdx)
-        return;
-      Var.setInitializer(llvm::ConstantDataArray::getString(
-          Var.getType()->getContext(), It->Value, /*AddNull=*/false));
-    }
-
-  private:
-    explicit Injector(const_iterator It, const_iterator Et) : It(It), Et(Et) {}
-
-    const_iterator It, Et;
-
-    friend class StringConstantTable;
-  };
-
-  Injector begin() const {
-    return Injector(StringConstants.begin(), StringConstants.end());
-  }
-
-private:
-  std::vector<Entry> StringConstants;
-  LLVMModuleAndContext MainModule;
-};
-
-//===----------------------------------------------------------------------===//
-// Module Splitter
-//===----------------------------------------------------------------------===//
-
-class LLVMModuleSplitterImpl {
-public:
-  explicit LLVMModuleSplitterImpl(LLVMModuleAndContext Module)
-      : MainModule(std::move(Module)) {}
-
-  /// Split the LLVM module into multiple modules using the provided process
-  /// function.
-  void split(LLVMSplitProcessFn ProcessFn,
-             llvm::SmallVectorImpl<llvm::Function> &Anchors);
-
-private:
-  struct ValueInfo {
-    /// The immediate global value dependencies of a value.
-    SmallVector<const llvm::GlobalValue *> Dependencies;
-    /// Map each global value to its index in the module. We will use this to
-    /// materialize global values from bitcode.
-    unsigned GvIdx;
-  };
-
-  struct TransitiveDeps {
-    /// The transitive dependencies.
-    llvm::MapVector<const llvm::GlobalValue *, unsigned> Deps;
-    /// True if computation is complete.
-    bool Complete = false;
-    /// The assigned module index.
-    std::optional<unsigned> MutIdx;
-  };
-
-  /// Collect the immediate global value dependencies of `Value`. `Orig` is the
-  /// original transitive value, which is not equal to `Value` when it is used
-  /// in a constant.
-  void collectImmediateDependencies(const llvm::Value *Value,
-                                    const llvm::GlobalValue *Orig);
-
-  /// The main LLVM module being split.
-  LLVMModuleAndContext MainModule;
-
-  /// The value info for each global value in the module.
-  llvm::DenseMap<const llvm::Value *, ValueInfo> Infos;
-
-  /// The transitive dependencies of each global value.
-  llvm::MapVector<const llvm::GlobalValue *, TransitiveDeps> TransDeps;
-
-  /// Users of split "anchors". These are global values where we don't want
-  /// their users to be split into different modules because it will cause the
-  /// symbol to be duplicated.
-  llvm::MapVector<const llvm::GlobalValue *, llvm::SetVector<TransitiveDeps *>>
-      SplitAnchorUsers;
-};
-} // namespace
-
-static LLVMModuleAndContext readAndMaterializeDependencies(
-    WritableMemoryBuffer &Buf,
-    const llvm::MapVector<const llvm::GlobalValue *, unsigned> &Set,
-    const StringConstantTable &Strtab) {
-
-  // First, create a lazy module with an internal bitcode materializer.
-  // TODO: Not sure how to make lazy loading metadata work.
-  LLVMModuleAndContext Result;
-  {
-    (void)Result.create(
-        [&](llvm::LLVMContext &Ctx) -> Expected<std::unique_ptr<Module>> {
-          return llvm::cantFail(llvm::getLazyBitcodeModule(
-              llvm::MemoryBufferRef(Buf.MemoryBuffer::getBuffer(), "<split-module>"), Ctx,
-              /*ShouldLazyLoadMetadata=*/false));
-        });
-    Result->setModuleInlineAsm("");
-  }
-
-  SmallVector<unsigned> SortIndices =
-      llvm::to_vector(llvm::make_second_range(Set));
-  llvm::sort(SortIndices, std::less<unsigned>());
-  auto* IdxIt = SortIndices.begin();
-  auto* IdxEnd = SortIndices.end();
-
-  // The global value indices go from globals, functions, then aliases. This
-  // mirrors the order in which global values are deleted by LLVM's GlobalDCE.
-  unsigned CurIdx = 0;
-  StringConstantTable::Injector It = Strtab.begin();
-  // We need to keep the IR "valid" for the verifier because `materializeAll`
-  // may invoke it. It doesn't matter since we're deleting the globals anyway.
-  for (llvm::GlobalVariable &Global : Result->globals()) {
-    if (IdxIt != IdxEnd && CurIdx == *IdxIt) {
-      ++IdxIt;
-      llvm::cantFail(Global.materialize());
-      It.materializeIfStringConstant(CurIdx, Global);
-    } else {
-      Global.setInitializer(nullptr);
-      Global.setComdat(nullptr);
-      Global.setLinkage(llvm::GlobalValue::ExternalLinkage);
-      // External link should not be DSOLocal anymore,
-      // otherwise position independent code generates
-      // `R_X86_64_PC32` instead of `R_X86_64_REX_GOTPCRELX`
-      // for these symbols and building shared library from
-      // a static archive of this module will error with an `fPIC` confusion.
-      Global.setDSOLocal(false);
-    }
-    ++CurIdx;
-  }
-  for (llvm::Function &Func : Result->functions()) {
-    if (IdxIt != IdxEnd && CurIdx == *IdxIt) {
-      ++IdxIt;
-      llvm::cantFail(Func.materialize());
-    } else {
-      Func.deleteBody();
-      Func.setComdat(nullptr);
-      Func.setLinkage(llvm::GlobalValue::ExternalLinkage);
-      // External link should not be DSOLocal anymore,
-      // otherwise position independent code generates
-      // `R_X86_64_PC32` instead of `R_X86_64_REX_GOTPCRELX`
-      // for these symbols and building shared library from
-      // a static archive of this module will error with an `fPIC` confusion.
-      // External link should not be DSOLocal anymore,
-      // otherwise position independent code generation get confused.
-      Func.setDSOLocal(false);
-    }
-    ++CurIdx;
-  }
-
-  // Finalize materialization of the module.
-  llvm::cantFail(Result->materializeAll());
-
-  // Now that the module is materialized, we can start deleting stuff. Just
-  // delete declarations with no uses.
-  for (llvm::GlobalVariable &Global :
-       llvm::make_early_inc_range(Result->globals())) {
-    if (Global.isDeclaration() && Global.use_empty())
-      Global.eraseFromParent();
-  }
-  for (llvm::Function &Func : llvm::make_early_inc_range(Result->functions())) {
-    if (Func.isDeclaration() && Func.use_empty())
-      Func.eraseFromParent();
-  }
-  return Result;
-}
-
-/// support for splitting an LLVM module into multiple parts using exported
-/// functions as anchors, and pull in all dependency on the call stack into one
-/// module.
-void splitPerAnchored(LLVMModuleAndContext Module, LLVMSplitProcessFn ProcessFn,
-                      llvm::SmallVectorImpl<llvm::Function> &Anchors) {
-  LLVMModuleSplitterImpl Impl(std::move(Module));
-  Impl.split(ProcessFn, Anchors);
-}
-
-void LLVMModuleSplitterImpl::split(
-    LLVMSplitProcessFn ProcessFn,
-    llvm::SmallVectorImpl<llvm::Function> &Anchors) {
-  // The use-def list is sparse. Use it to build a sparse dependency graph
-  // between global values.
-  IntrusiveRefCntPtr<StringConstantTable> Strtab(new StringConstantTable());
-  unsigned GvIdx = 0;
-
-  auto ComputeDeps = [&](const llvm::GlobalValue &value) {
-    Strtab->recordIfStringConstant(GvIdx, value);
-    Infos[&value].GvIdx = GvIdx++;
-    collectImmediateDependencies(&value, &value);
-  };
-  // NOTE: The visitation of globals then functions has to line up with
-  // `readAndMaterializeDependencies`.
-  for (const llvm::GlobalVariable &global : MainModule->globals()) {
-    ComputeDeps(global);
-    if (!global.hasInternalLinkage() && !global.hasPrivateLinkage())
-      TransDeps[&global];
-  }
-  for (const llvm::Function &Fn : MainModule->functions()) {
-    ComputeDeps(Fn);
-    if (!Fn.isDeclaration() && (Fn.hasExternalLinkage() || Fn.hasWeakLinkage()))
-      TransDeps[&Fn];
-  }
-
-  // If there is only one (or fewer) exported functions, forward the main
-  // module.
-  if (TransDeps.size() <= 1)
-    return ProcessFn(forwardModule(std::move(MainModule)), std::nullopt,
-                     /*numFunctionBase=*/0);
-
-  // Now for each export'd global value, compute the transitive set of
-  // dependencies using DFS.
-  SmallVector<const llvm::GlobalValue *> Worklist;
-  for (auto &[Value, Deps] : TransDeps) {
-    Worklist.clear();
-    Worklist.push_back(Value);
-    while (!Worklist.empty()) {
-      const llvm::GlobalValue *It = Worklist.pop_back_val();
-
-      auto [iter, inserted] = Deps.Deps.insert({It, -1});
-      if (!inserted) {
-        // Already visited.
-        continue;
-      }
-      // Pay the cost of the name lookup only on a miss.
-      const ValueInfo &Info = Infos.at(It);
-      iter->second = Info.GvIdx;
-
-      // If this value depends on another value that is going to be split, we
-      // don't want to duplicate the symbol. Keep all the users together.
-      if (It != Value) {
-        if (auto* DepIt = TransDeps.find(It);
-            DepIt != TransDeps.end()) {
-          auto &Users = SplitAnchorUsers[It];
-          Users.insert(&Deps);
-          // Make sure to include the other value in its own user list.
-          Users.insert(&DepIt->second);
-          // We don't have to recurse since the subgraph will get processed.
-          continue;
-        }
-      }
-
-      // If this value depends on a mutable global, keep track of it. We have to
-      // put all users of a mutable global in the same module.
-      if (auto *Global = dyn_cast<llvm::GlobalVariable>(It);
-          Global && !Global->isConstant())
-        SplitAnchorUsers[Global].insert(&Deps);
-
-      // Recursive on dependencies.
-      llvm::append_range(Worklist, Info.Dependencies);
-    }
-
-    Deps.Complete = true;
-  }
-
-  // For each mutable global, grab all the transitive users and put them in one
-  // module. If global A has user set A* and global B has user set B* where
-  // A* and B* have an empty intersection, all values in A* will be assigned 0
-  // and all values in B* will be assigned 1. If global C has user set C* that
-  // overlaps both A* and B*, it will overwrite both to 2.
-  SmallVector<SmallVector<TransitiveDeps *>> Bucketing(SplitAnchorUsers.size());
-  for (auto [CurMutIdx, Bucket, Users] :
-       llvm::enumerate(Bucketing, llvm::make_second_range(SplitAnchorUsers))) {
-    for (TransitiveDeps *Deps : Users) {
-      if (Deps->MutIdx && *Deps->MutIdx != CurMutIdx) {
-        auto &OtherBucket = Bucketing[*Deps->MutIdx];
-        for (TransitiveDeps *Other : OtherBucket) {
-          Bucket.push_back(Other);
-          Other->MutIdx = CurMutIdx;
-        }
-        OtherBucket.clear();
-        assert(*Deps->MutIdx == CurMutIdx);
-      } else {
-        Bucket.push_back(Deps);
-        Deps->MutIdx = CurMutIdx;
-      }
-    }
-  }
-
-  // Now that we have assigned buckets to each value, merge the transitive
-  // dependency sets of all values belonging to the same set.
-  SmallVector<llvm::MapVector<const llvm::GlobalValue *, unsigned>> Buckets(
-      Bucketing.size());
-  for (auto [Deps, Bucket] : llvm::zip(Bucketing, Buckets)) {
-    for (TransitiveDeps *Dep : Deps) {
-      for (auto &NamedValue : Dep->Deps)
-        Bucket.insert(NamedValue);
-    }
-  }
-
-  SmallVector<llvm::MapVector<const llvm::GlobalValue *, unsigned> *>
-      SetsToProcess;
-  SetsToProcess.reserve(Buckets.size() + TransDeps.size());
-
-  // Clone each mutable global bucket into its own module.
-  for (auto &Bucket : Buckets) {
-    if (Bucket.empty())
-      continue;
-    SetsToProcess.push_back(&Bucket);
-  }
-
-  for (auto &[Root, Deps] : TransDeps) {
-    // Skip values included in another transitive dependency set and values
-    // included in mutable global sets.
-    if (!Deps.MutIdx)
-      SetsToProcess.push_back(&Deps.Deps);
-  }
-
-  if (SetsToProcess.size() <= 1)
-    return ProcessFn(forwardModule(std::move(MainModule)), std::nullopt,
-                     /*numFunctionBase=*/0);
-
-  // Sort the sets by to schedule the larger modules first.
-  llvm::sort(SetsToProcess,
-             [](auto *Lhs, auto *Rhs) { return Lhs->size() > Rhs->size(); });
-
-  // Prepare to materialize slices of the module by first writing the main
-  // module as bitcode to a shared buffer.
-  auto Buf = WritableMemoryBuffer::getNewMemBuffer(size_t Size);
-  {
-    llvm::Module &Module = Strtab->externalizeStrings(std::move(MainModule));
-    llvm::WriteBitcodeToFile(Module, *Buf);
-  }
-
-  unsigned numFunctions = 0;
-  for (auto [idx, set] : llvm::enumerate(setsToProcess)) {
-    unsigned next = numFunctions + set->size();
-    auto makeModule = [set = std::move(*set), buf = BufferRef(buf.copy()),
-                       strtab = strtab.copy()]() mutable {
-      return readAndMaterializeDependencies(std::move(buf), set, *strtab,
-                                            /*ignoreFns=*/{});
-    };
-    processFn(std::move(makeModule), idx, numFunctions);
-    numFunctions = next;
-  }
-}
-
-void LLVMModuleSplitterImpl::collectImmediateDependencies(
-    const llvm::Value *value, const llvm::GlobalValue *orig) {
-  for (const llvm::Value *user : value->users()) {
-    // Recurse into pure constant users.
-    if (isa<llvm::Constant>(user) && !isa<llvm::GlobalValue>(user)) {
-      collectImmediateDependencies(user, orig);
-      continue;
-    }
-
-    if (auto *inst = dyn_cast<llvm::Instruction>(user)) {
-      const llvm::Function *func = inst->getParent()->getParent();
-      infos[func].dependencies.push_back(orig);
-    } else if (auto *globalVal = dyn_cast<llvm::GlobalValue>(user)) {
-      infos[globalVal].dependencies.push_back(orig);
-    } else {
-      llvm_unreachable("unexpected user of global value");
-    }
-  }
-}
-
-namespace {
-/// This class provides support for splitting an LLVM module into multiple
-/// parts.
-/// TODO: Clean up the splitters here (some code duplication) when we can move
-/// to per function llvm compilation.
-class LLVMModulePerFunctionSplitterImpl {
-public:
-  LLVMModulePerFunctionSplitterImpl(LLVMModuleAndContext module)
-      : mainModule(std::move(module)) {}
-
-  /// Split the LLVM module into multiple modules using the provided process
-  /// function.
-  void
-  split(LLVMSplitProcessFn processFn,
-        llvm::StringMap<llvm::GlobalValue::LinkageTypes> &symbolLinkageTypes,
-        unsigned numFunctionBase);
-
-private:
-  struct ValueInfo {
-    const llvm::Value *value = nullptr;
-    bool canBeSplit = true;
-    llvm::SmallPtrSet<const llvm::GlobalValue *, 4> dependencies;
-    llvm::SmallPtrSet<const llvm::GlobalValue *, 4> users;
-    /// Map each global value to its index in the module. We will use this to
-    /// materialize global values from bitcode.
-    unsigned gvIdx;
-    bool userEmpty = true;
-  };
-
-  /// Collect all of the immediate global value users of `value`.
-  void collectValueUsers(const llvm::GlobalValue *value);
-
-  /// Propagate use information through the module.
-  void propagateUseInfo();
-
-  /// The main LLVM module being split.
-  LLVMModuleAndContext mainModule;
-
-  /// The value info for each global value in the module.
-  llvm::MapVector<const llvm::GlobalValue *, ValueInfo> valueInfos;
-};
-} // namespace
-
-static void
-checkDuplicates(llvm::MapVector<const llvm::GlobalValue *, unsigned> &set,
-                llvm::StringSet<> &seenFns, llvm::StringSet<> &dupFns) {
-  for (auto [gv, _] : set) {
-    if (auto fn = dyn_cast<llvm::Function>(gv)) {
-      if (!seenFns.insert(fn->getName()).second) {
-        dupFns.insert(fn->getName());
-      }
-    }
-  }
-}
-
-/// support for splitting an LLVM module into multiple parts with each part
-/// contains only one function (with exception for coroutine related functions.)
-void KGEN::splitPerFunction(
-    LLVMModuleAndContext module, LLVMSplitProcessFn processFn,
-    llvm::StringMap<llvm::GlobalValue::LinkageTypes> &symbolLinkageTypes,
-    unsigned numFunctionBase) {
-  CompilerTimeTraceScope traceScope("splitPerFunction");
-  LLVMModulePerFunctionSplitterImpl impl(std::move(module));
-  impl.split(processFn, symbolLinkageTypes, numFunctionBase);
-}
-
-/// Split the LLVM module into multiple modules using the provided process
-/// function.
-void LLVMModulePerFunctionSplitterImpl::split(
-    LLVMSplitProcessFn processFn,
-    llvm::StringMap<llvm::GlobalValue::LinkageTypes> &symbolLinkageTypes,
-    unsigned numFunctionBase) {
-  // Compute the value info for each global in the module.
-  // NOTE: The visitation of globals then functions has to line up with
-  // `readAndMaterializeDependencies`.
-  auto strtab = RCRef<StringConstantTable>::create();
-  unsigned gvIdx = 0;
-  auto computeUsers = [&](const llvm::GlobalValue &value) {
-    strtab->recordIfStringConstant(gvIdx, value);
-    valueInfos[&value].gvIdx = gvIdx++;
-    collectValueUsers(&value);
-  };
-  llvm::for_each(mainModule->globals(), computeUsers);
-  llvm::for_each(mainModule->functions(), computeUsers);
-
-  // With use information collected, propagate it to the dependencies.
-  propagateUseInfo();
-
-  // Now we can split the module.
-  // We split the module per function and cloning any necessary dependencies:
-  // - For function dependencies, only clone the declaration unless its
-  //   coroutine related.
-  // - For other internal values, clone as is.
-  // This is much fine-grained splitting, which enables significantly higher
-  // levels of parallelism (and smaller generated artifacts).
-  // LLVM LTO style optimization may suffer a bit here since we don't have
-  // the full callstack present anymore in each cloned module.
-  llvm::DenseSet<const llvm::Value *> splitValues;
-  SmallVector<llvm::MapVector<const llvm::GlobalValue *, unsigned>>
-      setsToProcess;
-
-  // Hoist these collections to re-use memory allocations.
-  llvm::ValueToValueMapTy valueMap;
-  SmallPtrSet<const llvm::Value *, 4> splitDeps;
-  auto splitValue = [&](const llvm::GlobalValue *root) {
-    // If the function is already split, e.g. if it was a dependency of
-    // another function, skip it.
-    if (splitValues.count(root))
-      return;
-
-    auto &valueInfo = valueInfos[root];
-    valueMap.clear();
-    splitDeps.clear();
-    auto shouldSplit = [&](const llvm::GlobalValue *globalVal,
-                           const ValueInfo &info) {
-      // Only clone root and the declaration of its dependencies.
-      if (globalVal == root) {
-        splitDeps.insert(globalVal);
-        return true;
-      }
-
-      if ((info.canBeSplit || info.userEmpty) &&
-          isa_and_nonnull<llvm::Function>(globalVal))
-        return false;
-
-      if (valueInfo.dependencies.contains(globalVal)) {
-        splitDeps.insert(globalVal);
-        return true;
-      }
-
-      return false;
-    };
-
-    auto &set = setsToProcess.emplace_back();
-    for (auto &[globalVal, info] : valueInfos) {
-      if (shouldSplit(globalVal, info))
-        set.insert({globalVal, info.gvIdx});
-    }
-    if (set.empty())
-      setsToProcess.pop_back();
-
-    // Record the split values.
-    splitValues.insert(splitDeps.begin(), splitDeps.end());
-  };
-
-  [[maybe_unused]] int64_t count = 0;
-  SmallVector<const llvm::GlobalValue *> toSplit;
-  unsigned unnamedGlobal = numFunctionBase;
-  for (auto &global : mainModule->globals()) {
-    if (global.hasInternalLinkage() || global.hasPrivateLinkage()) {
-      if (!global.hasName()) {
-        // Give unnamed GlobalVariable a unique name so that MCLink will not get
-        // confused to name them while generating linked code since the IR
-        // values can be different in each splits (for X86 backend.)
-        // asan build inserts these unnamed GlobalVariables.
-        global.setName("__mojo_unnamed" + Twine(unnamedGlobal++));
-      }
-
-      symbolLinkageTypes.insert({global.getName().str(), global.getLinkage()});
-      global.setLinkage(llvm::GlobalValue::WeakAnyLinkage);
-      continue;
-    }
-
-    if (global.hasExternalLinkage())
-      continue;
-
-    // TODO: Add special handling for `llvm.global_ctors` and
-    // `llvm.global_dtors`, because otherwise they end up tying almost all
-    // symbols into the same split.
-    LLVM_DEBUG(llvm::dbgs()
-                   << (count++) << ": split global: " << global << "\n";);
-    toSplit.emplace_back(&global);
-  }
-
-  for (auto &fn : mainModule->functions()) {
-    if (fn.isDeclaration())
-      continue;
-
-    ValueInfo &info = valueInfos[&fn];
-    if (fn.hasInternalLinkage() || fn.hasPrivateLinkage()) {
-      // Avoid renaming when linking in MCLink.
-      symbolLinkageTypes.insert({fn.getName().str(), fn.getLinkage()});
-      fn.setLinkage(llvm::Function::LinkageTypes::WeakAnyLinkage);
-    }
-
-    if (info.canBeSplit || info.userEmpty) {
-      LLVM_DEBUG(llvm::dbgs()
-                     << (count++) << ": split fn: " << fn.getName() << "\n";);
-      toSplit.emplace_back(&fn);
-    }
-  }
-
-  // Run this now since we just changed the linkages.
-  for (const llvm::GlobalValue *value : toSplit)
-    splitValue(value);
-
-  if (setsToProcess.size() <= 1)
-    return processFn(forwardModule(std::move(mainModule)), std::nullopt,
-                     numFunctionBase);
-
-  auto duplicatedFns = std::move(mainModule.duplicatedFns);
-
-  // Prepare to materialize slices of the module by first writing the main
-  // module as bitcode to a shared buffer.
-  auto buf = WriteableBuffer::get();
-  {
-    CompilerTimeTraceScope traceScope("writeMainModuleBitcode");
-    llvm::Module &module = strtab->externalizeStrings(std::move(mainModule));
-    llvm::WriteBitcodeToFile(module, *buf);
-  }
-
-  unsigned numFunctions = numFunctionBase;
-  llvm::StringSet<> seenFns;
-  for (auto [idx, set] : llvm::enumerate(setsToProcess)) {
-    // Giving each function a unique ID across all splits for proper MC level
-    // linking and codegen into one object file where duplicated functions
-    // in each split will be deduplicated (with the linking).
-    llvm::StringSet<> currDuplicatedFns = duplicatedFns;
-    checkDuplicates(set, seenFns, currDuplicatedFns);
-
-    unsigned next = numFunctions + set.size();
-    auto makeModule = [set = std::move(set), buf = BufferRef(buf.copy()),
-                       strtab = strtab.copy(), currDuplicatedFns]() mutable {
-      return readAndMaterializeDependencies(std::move(buf), set, *strtab,
-                                            currDuplicatedFns);
-    };
-    processFn(std::move(makeModule), idx, numFunctions);
-    numFunctions = next;
-  }
-}
-
-/// Collect all of the immediate global value users of `value`.
-void LLVMModulePerFunctionSplitterImpl::collectValueUsers(
-    const llvm::GlobalValue *value) {
-  SmallVector<const llvm::User *> worklist(value->users());
-
-  while (!worklist.empty()) {
-    const llvm::User *userIt = worklist.pop_back_val();
-
-    // Recurse into pure constant users.
-    if (isa<llvm::Constant>(userIt) && !isa<llvm::GlobalValue>(userIt)) {
-      worklist.append(userIt->user_begin(), userIt->user_end());
-      continue;
-    }
-
-    if (const auto *inst = dyn_cast<llvm::Instruction>(userIt)) {
-      const llvm::Function *func = inst->getParent()->getParent();
-      valueInfos[value].users.insert(func);
-      valueInfos[func];
-    } else if (const auto *globalVal = dyn_cast<llvm::GlobalValue>(userIt)) {
-      valueInfos[value].users.insert(globalVal);
-      valueInfos[globalVal];
-    } else {
-      llvm_unreachable("unexpected user of global value");
-    }
-  }
-
-  // If the current value is a mutable global variable, then it can't be
-  // split.
-  if (auto *global = dyn_cast<llvm::GlobalVariable>(value))
-    valueInfos[value].canBeSplit = global->isConstant();
-}
-
-/// Propagate use information through the module.
-void LLVMModulePerFunctionSplitterImpl::propagateUseInfo() {
-  std::vector<ValueInfo *> worklist;
-
-  // Each value depends on itself. Seed the iteration with that.
-  for (auto &[value, info] : valueInfos) {
-    if (auto func = llvm::dyn_cast<llvm::Function>(value)) {
-      if (func->isDeclaration())
-        continue;
-    }
-
-    info.dependencies.insert(value);
-    info.value = value;
-    worklist.push_back(&info);
-    if (!info.canBeSplit) {
-      // If a value cannot be split, its users are also its dependencies.
-      llvm::set_union(info.dependencies, info.users);
-    }
-  }
-
-  while (!worklist.empty()) {
-    ValueInfo *info = worklist.back();
-    worklist.pop_back();
-
-    // Propagate the dependencies of this value to its users.
-    for (const llvm::GlobalValue *user : info->users) {
-      ValueInfo &userInfo = valueInfos.find(user)->second;
-      if (info == &userInfo)
-        continue;
-      bool changed = false;
-
-      // Merge dependency to user if current value is not a function that will
-      // be split into a separate module.
-      bool mergeToUserDep = true;
-      if (llvm::isa_and_nonnull<llvm::Function>(info->value)) {
-        mergeToUserDep = !info->canBeSplit;
-      }
-
-      // If there is a change, add the user info to the worklist.
-      if (mergeToUserDep) {
-        if (llvm::set_union(userInfo.dependencies, info->dependencies))
-          changed = true;
-      }
-
-      // If the value cannot be split, its users cannot be split either.
-      if (!info->canBeSplit && userInfo.canBeSplit) {
-        userInfo.canBeSplit = false;
-        changed = true;
-        // If a value cannot be split, its users are also its dependencies.
-        llvm::set_union(userInfo.dependencies, userInfo.users);
-      }
-
-      if (changed) {
-        userInfo.value = user;
-        worklist.push_back(&userInfo);
-      }
-    }
-
-    if (info->canBeSplit || isa_and_nonnull<llvm::GlobalValue>(info->value))
-      continue;
-
-    // If a value cannot be split, propagate its dependencies up to its
-    // dependencies.
-    for (const llvm::GlobalValue *dep : info->dependencies) {
-      ValueInfo &depInfo = valueInfos.find(dep)->second;
-      if (info == &depInfo)
-        continue;
-      if (llvm::set_union(depInfo.dependencies, info->dependencies)) {
-        depInfo.value = dep;
-        worklist.push_back(&depInfo);
-      }
-    }
-  }
-
-  for (auto &[value, info] : valueInfos) {
-    info.userEmpty = info.users.empty() ||
-                     (info.users.size() == 1 && info.users.contains(value));
-  }
-}

>From a8d91734263af13ec5f96d3b4a4608155ab0260c Mon Sep 17 00:00:00 2001
From: Weiwei Chen <weiwei.chen at modular.com>
Date: Fri, 14 Mar 2025 17:45:55 -0400
Subject: [PATCH 09/17] compiles.

---
 .../llvm/ModuleSplitter/ModuleSplitter.h      |   9 +-
 llvm/lib/ModuleSplitter/ModuleSplitter.cpp    |   9 +-
 .../llvm-module-splitter.cpp                  | 161 +++++++++++-------
 .../llvm-project-overlay/llvm/BUILD.bazel     |   1 +
 4 files changed, 105 insertions(+), 75 deletions(-)

diff --git a/llvm/include/llvm/ModuleSplitter/ModuleSplitter.h b/llvm/include/llvm/ModuleSplitter/ModuleSplitter.h
index 912d8edb7c189..112d85433dec5 100644
--- a/llvm/include/llvm/ModuleSplitter/ModuleSplitter.h
+++ b/llvm/include/llvm/ModuleSplitter/ModuleSplitter.h
@@ -64,14 +64,13 @@ forwardModule(LLVMModuleAndContext &&Module) {
 /// Support for splitting an LLVM module into multiple parts using anchored
 /// functions (e.g. exported functions), and pull in all dependency on the
 // call stack into one module.
-void splitPerAnchored(LLVMModuleAndContext Module,
-                      LLVMSplitProcessFn ProcessFn,
-                      llvm::SmallVectorImpl<llvm::Function>& Anchors);
+void splitPerAnchored(LLVMModuleAndContext Module, LLVMSplitProcessFn ProcessFn,
+                      llvm::SmallVectorImpl<llvm::Function> &Anchors);
 
 /// Support for splitting an LLVM module into multiple parts with each part
 /// contains only one function.
-void splitPerFunction(
-    LLVMModuleAndContext Module, LLVMSplitProcessFn ProcessFn);
+void splitPerFunction(LLVMModuleAndContext Module,
+                      LLVMSplitProcessFn ProcessFn);
 
 } // namespace llvm
 
diff --git a/llvm/lib/ModuleSplitter/ModuleSplitter.cpp b/llvm/lib/ModuleSplitter/ModuleSplitter.cpp
index 65c4da1aaa7d6..6dd5c1b755926 100644
--- a/llvm/lib/ModuleSplitter/ModuleSplitter.cpp
+++ b/llvm/lib/ModuleSplitter/ModuleSplitter.cpp
@@ -228,8 +228,8 @@ static LLVMModuleAndContext readAndMaterializeDependencies(
   SmallVector<unsigned> SortIndices =
       llvm::to_vector(llvm::make_second_range(Set));
   llvm::sort(SortIndices, std::less<unsigned>());
-  auto* IdxIt = SortIndices.begin();
-  auto* IdxEnd = SortIndices.end();
+  auto *IdxIt = SortIndices.begin();
+  auto *IdxEnd = SortIndices.end();
 
   // The global value indices go from globals, functions, then aliases. This
   // mirrors the order in which global values are deleted by LLVM's GlobalDCE.
@@ -354,8 +354,7 @@ void LLVMModuleSplitterImpl::split(
       // If this value depends on another value that is going to be split, we
       // don't want to duplicate the symbol. Keep all the users together.
       if (It != Value) {
-        if (auto* DepIt = TransDeps.find(It);
-            DepIt != TransDeps.end()) {
+        if (auto *DepIt = TransDeps.find(It); DepIt != TransDeps.end()) {
           auto &Users = SplitAnchorUsers[It];
           Users.insert(&Deps);
           // Make sure to include the other value in its own user list.
@@ -493,7 +492,7 @@ namespace {
 class LLVMModulePerFunctionSplitterImpl {
 public:
   LLVMModulePerFunctionSplitterImpl(LLVMModuleAndContext Module)
-      : mainModule(std::move(Module)) {}
+      : MainModule(std::move(Module)) {}
 
   /// Split the LLVM module into multiple modules using the provided process
   /// function.
diff --git a/llvm/tools/llvm-module-splitter/llvm-module-splitter.cpp b/llvm/tools/llvm-module-splitter/llvm-module-splitter.cpp
index 8e719dbcba9f9..6fbc4e8386c77 100644
--- a/llvm/tools/llvm-module-splitter/llvm-module-splitter.cpp
+++ b/llvm/tools/llvm-module-splitter/llvm-module-splitter.cpp
@@ -4,6 +4,8 @@
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/ModuleSplitter/ModuleSplitter.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/raw_ostream.h"
@@ -12,6 +14,26 @@
 
 using namespace llvm;
 
+std::string InputFilename{"-"};
+std::string OutputPrefix{"-"};
+bool PerFunctionSplit = false;
+
+llvm::cl::OptionCategory Cat{"Common command line options"};
+
+cl::opt<std::string, true> InputFilenameOpt{
+    llvm::cl::Positional, llvm::cl::desc("<input file>"),
+    llvm::cl::location(InputFilename), llvm::cl::cat(Cat)};
+
+cl::opt<std::string, true> OutputPrefixOpt{
+    "output-prefix", llvm::cl::desc("output prefix"),
+    llvm::cl::value_desc("output prefix"), llvm::cl::location(OutputPrefix),
+    llvm::cl::cat(Cat)};
+
+cl::opt<bool, true> PerFunctionSplitOpt{
+    "per-func", llvm::cl::desc("split each function into separate modules"),
+    llvm::cl::value_desc("split each function into separate modules"),
+    llvm::cl::location(PerFunctionSplit), llvm::cl::cat(Cat)};
+
 //===----------------------------------------------------------------------===//
 // Module Splitter
 //===----------------------------------------------------------------------===//
@@ -32,70 +54,79 @@ int main(int argc, char **argv) {
   // Enable command line options for various MLIR internals.
   llvm::cl::ParseCommandLineOptions(argc, argv);
 
-  LLVMModuleAndContext Module;
+  LLVMModuleAndContext M;
+  return 0;
+  Expected<bool> Err =
+      M.create([&](LLVMContext &Ctx) -> Expected<std::unique_ptr<Module>> {
+        if (std::unique_ptr<Module> m = readModule(Ctx, InputFilename))
+          return m;
+        return make_error<StringError>("could not load LLVM file",
+                                       inconvertibleErrorCode());
+      });
+
+  if (Err) {
+    llvm::errs() << toString(Err.takeError()) << "\n";
+    return -1;
+  }
+
+  std::unique_ptr<llvm::ToolOutputFile> Output = nullptr;
+  if (OutputPrefix == "-") {
+    std::error_code Error;
+    Output = std::make_unique<llvm::ToolOutputFile>(OutputPrefix, Error,
+                                                    llvm::sys::fs::OF_None);
+    if (Error) {
+      llvm::errs() << "Cannot open output file: '" + OutputPrefix +
+                          "':" + Error.message()
+                   << "\n";
+      return -1;
+    }
+  }
+
+  auto OutputLambda =
+      [&](llvm::unique_function<LLVMModuleAndContext()> ProduceModule,
+          std::optional<int64_t> Idx, unsigned NumFunctionsBase) mutable {
+        LLVMModuleAndContext SubModule = ProduceModule();
+        if (OutputPrefix == "-") {
+          Output->os() << "##############################################\n";
+          if (Idx)
+            Output->os() << "# [LLVM Module Split: submodule " << *Idx << "]\n";
+          else
+            Output->os() << "# [LLVM Module Split: main module]\n";
+          Output->os() << "##############################################\n";
+          Output->os() << *SubModule;
+          Output->os() << "\n";
+        } else {
+          std::string OutPath;
+          if (!Idx) {
+            OutPath = OutputPrefix + ".ll";
+          } else {
+            OutPath = (OutputPrefix + "." + Twine(*Idx) + ".ll").str();
+          }
+
+          std::error_code EC;
+          raw_fd_ostream OutFile(OutPath.c_str(), EC, llvm::sys::fs::OF_None);
+
+          if (OutFile.error()) {
+            llvm::errs() << "Cannot open output file: '" + OutPath + "."
+                         << "\n";
+            exit(-1);
+          }
+
+          OutFile << *SubModule;
+          OutFile.close();
+          llvm::outs() << "Write llvm module to " << OutPath << "\n";
+        }
+      };
+
+  llvm::StringMap<llvm::GlobalValue::LinkageTypes> SymbolLinkageTypes;
+  if (PerFunctionSplit)
+    splitPerFunction(std::move(M), OutputLambda);
+  else {
+    SmallVector<llvm::Function> Anchors;
+    splitPerAnchored(std::move(M), OutputLambda, Anchors);
+  }
+
+  if (Output)
+    Output->keep();
   return 0;
-  //ErrorOrSuccess err = module.create(
-  //    [&](LLVMContext &ctx) -> M::ErrorOr<std::unique_ptr<Module>> {
-  //      if (std::unique_ptr<Module> module =
-  //              readModule(ctx, clOptions.inputFilename))
-  //        return module;
-  //      return M::Error("could not load LLVM file");
-  //    });
-  //if (err) {
-  //  llvm::errs() << err.getError() << "\n";
-  //  return -1;
-  //}
-
-  //std::unique_ptr<llvm::ToolOutputFile> output = nullptr;
-  //if (clOptions.outputPrefix == "-") {
-  //  std::error_code error;
-  //  output = std::make_unique<llvm::ToolOutputFile>(
-  //      clOptions.outputPrefix, error, llvm::sys::fs::OF_None);
-  //  if (error)
-  //    exit(clOptions.options.reportError("Cannot open output file: '" +
-  //                                       clOptions.outputPrefix +
-  //                                       "':" + error.message()));
-  //}
-
-  //auto outputLambda =
-  //    [&](llvm::unique_function<LLVMModuleAndContext()> produceModule,
-  //        std::optional<int64_t> idx, unsigned numFunctionsBase) mutable {
-  //      LLVMModuleAndContext subModule = produceModule();
-  //      if (clOptions.outputPrefix == "-") {
-  //        output->os() << "##############################################\n";
-  //        if (idx)
-  //          output->os() << "# [LLVM Module Split: submodule " << *idx << "]\n";
-  //        else
-  //          output->os() << "# [LLVM Module Split: main module]\n";
-  //        output->os() << "##############################################\n";
-  //        output->os() << *subModule;
-  //        output->os() << "\n";
-  //      } else {
-  //        std::string outPath;
-  //        if (!idx) {
-  //          outPath = clOptions.outputPrefix + ".ll";
-  //        } else {
-  //          outPath =
-  //              (clOptions.outputPrefix + "." + Twine(*idx) + ".ll").str();
-  //        }
-  //        auto outFile = mlir::openOutputFile(outPath);
-  //        if (!outFile) {
-  //          exit(clOptions.options.reportError("Cannot open output file: '" +
-  //                                             outPath + "."));
-  //        }
-  //        outFile->os() << *subModule;
-  //        outFile->keep();
-  //        llvm::outs() << "Write llvm module to " << outPath << "\n";
-  //      }
-  //    };
-
-  //llvm::StringMap<llvm::GlobalValue::LinkageTypes> symbolLinkageTypes;
-  //if (clOptions.perFunctionSplit)
-  //  splitPerFunction(std::move(module), outputLambda, symbolLinkageTypes);
-  //else
-  //  splitPerExported(std::move(module), outputLambda);
-
-  //if (output)
-  //  output->keep();
-  //return 0;
 }
diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index 6204af588fc0a..8a2c0d88d90b2 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
@@ -2100,6 +2100,7 @@ cc_library(
         ":Core",
         ":IRReader",
         ":Support",
+        ":TransformUtils",
     ],
 )
 

>From 76c9cdfb3642daf5ea0881d59444189f2a2478e5 Mon Sep 17 00:00:00 2001
From: Weiwei Chen <weiwei.chen at modular.com>
Date: Fri, 14 Mar 2025 18:07:22 -0400
Subject: [PATCH 10/17] Update cmake.

---
 llvm/lib/ModuleSplitter/CMakeLists.txt         | 8 +++++---
 llvm/tools/llvm-module-splitter/CMakeLists.txt | 4 +++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/ModuleSplitter/CMakeLists.txt b/llvm/lib/ModuleSplitter/CMakeLists.txt
index cba910855cadd..5f2f1fc1a3c09 100644
--- a/llvm/lib/ModuleSplitter/CMakeLists.txt
+++ b/llvm/lib/ModuleSplitter/CMakeLists.txt
@@ -5,9 +5,11 @@ add_llvm_component_library(LLVMModuleSplitter
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/ModuleSplitter
 
   LINK_COMPONENTS
-  Bitcode
   Core
-  IR
   IRReader
+  LLVMBitReader
+  LLVMBitWriter
+  LLVMModuleSplitter
   Support
-  )
+  TransformUtils
+)
diff --git a/llvm/tools/llvm-module-splitter/CMakeLists.txt b/llvm/tools/llvm-module-splitter/CMakeLists.txt
index 30ba638e4ffb8..f624d0acc32fc 100644
--- a/llvm/tools/llvm-module-splitter/CMakeLists.txt
+++ b/llvm/tools/llvm-module-splitter/CMakeLists.txt
@@ -1,11 +1,13 @@
 set(LLVM_LINK_COMPONENTS
   Core
   IRReader
+  LLVMBitReader
+  LLVMBitWriter
   LLVMModuleSplitter
   Support
+  TransformUtils
 )
 
 add_llvm_tool(llvm-module-splitter
   llvm-module-splitter.cpp
-
 )

>From be51194dd830027958e85bfe5a9f5a0d86253316 Mon Sep 17 00:00:00 2001
From: Weiwei Chen <weiwei.chen at modular.com>
Date: Mon, 17 Mar 2025 09:44:38 -0400
Subject: [PATCH 11/17] Fix CMake.

---
 llvm/lib/ModuleSplitter/CMakeLists.txt         | 5 ++---
 llvm/tools/llvm-module-splitter/CMakeLists.txt | 6 +++---
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/ModuleSplitter/CMakeLists.txt b/llvm/lib/ModuleSplitter/CMakeLists.txt
index 5f2f1fc1a3c09..9b33ef48e4d69 100644
--- a/llvm/lib/ModuleSplitter/CMakeLists.txt
+++ b/llvm/lib/ModuleSplitter/CMakeLists.txt
@@ -7,9 +7,8 @@ add_llvm_component_library(LLVMModuleSplitter
   LINK_COMPONENTS
   Core
   IRReader
-  LLVMBitReader
-  LLVMBitWriter
-  LLVMModuleSplitter
+  BitReader
+  BitWriter
   Support
   TransformUtils
 )
diff --git a/llvm/tools/llvm-module-splitter/CMakeLists.txt b/llvm/tools/llvm-module-splitter/CMakeLists.txt
index f624d0acc32fc..8ab424219270f 100644
--- a/llvm/tools/llvm-module-splitter/CMakeLists.txt
+++ b/llvm/tools/llvm-module-splitter/CMakeLists.txt
@@ -1,9 +1,9 @@
 set(LLVM_LINK_COMPONENTS
   Core
   IRReader
-  LLVMBitReader
-  LLVMBitWriter
-  LLVMModuleSplitter
+  BitReader
+  BitWriter
+  ModuleSplitter
   Support
   TransformUtils
 )

>From ede15db4903d808887d6b36cfd1adb18d385798b Mon Sep 17 00:00:00 2001
From: Weiwei Chen <weiwei.chen at modular.com>
Date: Wed, 19 Mar 2025 00:23:16 -0400
Subject: [PATCH 12/17] test works.

---
 llvm/include/llvm/ModuleSplitter/ModuleSplitter.h   |  6 ++++--
 llvm/lib/ModuleSplitter/ModuleSplitter.cpp          | 13 ++++++++++---
 .../llvm-module-splitter/llvm-module-splitter.cpp   | 10 +++++-----
 utils/bazel/llvm-project-overlay/llvm/BUILD.bazel   |  1 -
 4 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/llvm/include/llvm/ModuleSplitter/ModuleSplitter.h b/llvm/include/llvm/ModuleSplitter/ModuleSplitter.h
index 112d85433dec5..4ff18beabd3f0 100644
--- a/llvm/include/llvm/ModuleSplitter/ModuleSplitter.h
+++ b/llvm/include/llvm/ModuleSplitter/ModuleSplitter.h
@@ -69,8 +69,10 @@ void splitPerAnchored(LLVMModuleAndContext Module, LLVMSplitProcessFn ProcessFn,
 
 /// Support for splitting an LLVM module into multiple parts with each part
 /// contains only one function.
-void splitPerFunction(LLVMModuleAndContext Module,
-                      LLVMSplitProcessFn ProcessFn);
+void splitPerFunction(
+    LLVMModuleAndContext Module, LLVMSplitProcessFn ProcessFn,
+    llvm::StringMap<llvm::GlobalValue::LinkageTypes> &SymbolLinkageTypes,
+    unsigned NumFunctionBase);
 
 } // namespace llvm
 
diff --git a/llvm/lib/ModuleSplitter/ModuleSplitter.cpp b/llvm/lib/ModuleSplitter/ModuleSplitter.cpp
index 6dd5c1b755926..5334eccfa991c 100644
--- a/llvm/lib/ModuleSplitter/ModuleSplitter.cpp
+++ b/llvm/lib/ModuleSplitter/ModuleSplitter.cpp
@@ -41,8 +41,8 @@ Expected<bool> LLVMModuleAndContext::create(
         CreateModule) {
   assert(!Module && "already have a module");
   auto ModuleOr = CreateModule(*Ctx);
-  if (Error Err = ModuleOr.takeError())
-    return Err;
+  if (!ModuleOr)
+    return ModuleOr.takeError();
 
   Module = std::move(*ModuleOr);
   return true;
@@ -216,12 +216,15 @@ static LLVMModuleAndContext readAndMaterializeDependencies(
   // TODO: Not sure how to make lazy loading metadata work.
   LLVMModuleAndContext Result;
   {
-    (void)Result.create(
+    auto CreateOr = Result.create(
         [&](llvm::LLVMContext &Ctx) -> Expected<std::unique_ptr<Module>> {
           return llvm::cantFail(
               llvm::getLazyBitcodeModule(Buf, Ctx,
                                          /*ShouldLazyLoadMetadata=*/false));
         });
+    if (!CreateOr)
+      LLVMModuleAndContext();
+
     Result->setModuleInlineAsm("");
   }
 
@@ -292,6 +295,7 @@ static LLVMModuleAndContext readAndMaterializeDependencies(
   return Result;
 }
 
+namespace llvm {
 /// support for splitting an LLVM module into multiple parts using exported
 /// functions as anchors, and pull in all dependency on the call stack into one
 /// module.
@@ -300,6 +304,7 @@ void splitPerAnchored(LLVMModuleAndContext Module, LLVMSplitProcessFn ProcessFn,
   LLVMModuleSplitterImpl Impl(std::move(Module));
   Impl.split(ProcessFn, Anchors);
 }
+} // namespace llvm
 
 void LLVMModuleSplitterImpl::split(
     LLVMSplitProcessFn ProcessFn,
@@ -527,6 +532,7 @@ class LLVMModulePerFunctionSplitterImpl {
 };
 } // namespace
 
+namespace llvm {
 /// support for splitting an LLVM module into multiple parts with each part
 /// contains only one function (with exception for coroutine related functions.)
 void splitPerFunction(
@@ -536,6 +542,7 @@ void splitPerFunction(
   LLVMModulePerFunctionSplitterImpl Impl(std::move(Module));
   Impl.split(ProcessFn, SymbolLinkageTypes, NumFunctionBase);
 }
+} // namespace llvm
 
 /// Split the LLVM module into multiple modules using the provided process
 /// function.
diff --git a/llvm/tools/llvm-module-splitter/llvm-module-splitter.cpp b/llvm/tools/llvm-module-splitter/llvm-module-splitter.cpp
index 6fbc4e8386c77..72c0d9afb7748 100644
--- a/llvm/tools/llvm-module-splitter/llvm-module-splitter.cpp
+++ b/llvm/tools/llvm-module-splitter/llvm-module-splitter.cpp
@@ -55,16 +55,15 @@ int main(int argc, char **argv) {
   llvm::cl::ParseCommandLineOptions(argc, argv);
 
   LLVMModuleAndContext M;
-  return 0;
   Expected<bool> Err =
       M.create([&](LLVMContext &Ctx) -> Expected<std::unique_ptr<Module>> {
-        if (std::unique_ptr<Module> m = readModule(Ctx, InputFilename))
-          return m;
+        if (std::unique_ptr<Module> M = readModule(Ctx, InputFilename))
+          return M;
         return make_error<StringError>("could not load LLVM file",
                                        inconvertibleErrorCode());
       });
 
-  if (Err) {
+  if (!Err) {
     llvm::errs() << toString(Err.takeError()) << "\n";
     return -1;
   }
@@ -119,8 +118,9 @@ int main(int argc, char **argv) {
       };
 
   llvm::StringMap<llvm::GlobalValue::LinkageTypes> SymbolLinkageTypes;
+
   if (PerFunctionSplit)
-    splitPerFunction(std::move(M), OutputLambda);
+    splitPerFunction(std::move(M), OutputLambda, SymbolLinkageTypes, 0);
   else {
     SmallVector<llvm::Function> Anchors;
     splitPerAnchored(std::move(M), OutputLambda, Anchors);
diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index 8a2c0d88d90b2..1ae5b3d940df7 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
@@ -5328,7 +5328,6 @@ cc_binary(
         "tools/llvm-module-splitter/*.cpp",
     ]),
     copts = llvm_copts,
-    stamp = 0,
     deps = [
         ":Core",
         ":IRReader",

>From c1b30e45e569b27bf80530a699d1eea75c279844 Mon Sep 17 00:00:00 2001
From: Weiwei Chen <weiwei.chen at modular.com>
Date: Wed, 19 Mar 2025 00:52:10 -0400
Subject: [PATCH 13/17] Add first lit test.

---
 llvm/test/CMakeLists.txt                   |  1 +
 llvm/test/ModuleSplitter/exported-users.ll | 27 ++++++++++++++++++++++
 llvm/test/lit.cfg.py                       |  1 +
 3 files changed, 29 insertions(+)
 create mode 100644 llvm/test/ModuleSplitter/exported-users.ll

diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index 1e4651210cd02..72b1cde7afde0 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -110,6 +110,7 @@ set(LLVM_TEST_DEPENDS
           llvm-mca
           llvm-ml
           llvm-modextract
+          llvm-module-splitter
           llvm-mt
           llvm-nm
           llvm-objcopy
diff --git a/llvm/test/ModuleSplitter/exported-users.ll b/llvm/test/ModuleSplitter/exported-users.ll
new file mode 100644
index 0000000000000..2dab41ab527f7
--- /dev/null
+++ b/llvm/test/ModuleSplitter/exported-users.ll
@@ -0,0 +1,27 @@
+; RUN: llvm-module-splitter %s | FileCheck %s
+
+; CHECK-LABEL: [LLVM Module Split: submodule 0]
+
+; CHECK: define void @foo
+define void @foo() {
+  call void @baz()
+  ret void
+}
+
+; CHECK: define void @baz
+define void @baz() {
+  ret void
+}
+
+; CHECK: define void @bar
+define void @bar() {
+  call void @baz()
+  ret void
+}
+
+; CHECK-LABEL: [LLVM Module Split: submodule 1]
+
+; CHECK: define void @boo
+define void @boo() {
+  ret void
+}
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index aad7a088551b2..698b815b220c6 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -208,6 +208,7 @@ def get_asan_rtlib():
         "llvm-mc",
         "llvm-mca",
         "llvm-modextract",
+        "llvm-module-splitter",
         "llvm-nm",
         "llvm-objcopy",
         "llvm-objdump",

>From 8390263e2b8343ef89add2a4dc7a1dcf2e9581c7 Mon Sep 17 00:00:00 2001
From: Weiwei Chen <weiwei.chen at modular.com>
Date: Sat, 22 Mar 2025 23:12:52 -0400
Subject: [PATCH 14/17] Add MCLinker skeleton.

---
 llvm/include/llvm/MCLinker/MCLinker.h         | 136 ++++++++++++++++++
 llvm/lib/CMakeLists.txt                       |   1 +
 llvm/lib/MCLinker/CMakeLists.txt              |  15 ++
 llvm/lib/MCLinker/MCLinker.cpp                |  15 ++
 .../llvm-project-overlay/llvm/BUILD.bazel     |  20 +++
 5 files changed, 187 insertions(+)
 create mode 100644 llvm/include/llvm/MCLinker/MCLinker.h
 create mode 100644 llvm/lib/MCLinker/CMakeLists.txt
 create mode 100644 llvm/lib/MCLinker/MCLinker.cpp

diff --git a/llvm/include/llvm/MCLinker/MCLinker.h b/llvm/include/llvm/MCLinker/MCLinker.h
new file mode 100644
index 0000000000000..9c8721700975e
--- /dev/null
+++ b/llvm/include/llvm/MCLinker/MCLinker.h
@@ -0,0 +1,136 @@
+//===- MCLinker.h - Linker at MC level------------- -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCLINKER_H
+#define LLVM_MCLINKER_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ModuleSplitter/ModuleSplitter.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+/// This file defines data structures to help linking LLVM modules
+/// at MC level (right after codegen) and AsmPrint into one .o or .s file.
+/// This linking is needed because we parallelize the llvm opt and
+/// llc pipelines by splitting LLVMModule into multiple splits
+/// with symbol linkage changes.
+/// Linking at MC level helps to fix the temporary symbol linkage change,
+/// deduplicate multiple symbols among the splits.
+/// This allows mojo compilation to produce 1 .o file for each program
+/// (instead of one .a file with multiple .o files in .a) with reduced
+/// object file size (due to symbol dedup and linkage restoration).
+
+struct MCInfo {
+  MCInfo(std::unique_ptr<llvm::MachineModuleInfo> &&MachineModuleInfo,
+         LLVMModuleAndContext &&ModuleAndContext,
+         llvm::StringMap<const llvm::Function *> &FnNameToFnPtr,
+         std::unique_ptr<llvm::TargetMachine> &&TgtMachine,
+         std::unique_ptr<llvm::MCContext> &&McContext,
+         std::optional<int> SplitIdx)
+      : ModuleAndContext(std::move(ModuleAndContext)),
+        McContext(std::move(McContext)),
+        MachineModuleInfo(std::move(MachineModuleInfo)),
+        FnNameToFnPtr(std::move(FnNameToFnPtr)),
+        TgtMachine(std::move(TgtMachine)), SplitIdx(SplitIdx){};
+
+  MCInfo(MCInfo &&Other)
+      : ModuleBuf(std::move(Other.ModuleBuf)),
+        ModuleAndContext(std::move(Other.ModuleAndContext)),
+        McContext(std::move(Other.McContext)),
+        MachineModuleInfo(std::move(Other.MachineModuleInfo)),
+        FnNameToFnPtr(std::move(Other.FnNameToFnPtr)),
+        TgtMachine(std::move(Other.TgtMachine)), SplitIdx(Other.SplitIdx) {}
+
+  /// Serialize the llvm::Module into bytecode.
+  //  We will deserialize it back to put into
+  /// a different LLVMContext that is required for linking using llvm::Linker.
+  std::unique_ptr<WritableMemoryBuffer> ModuleBuf;
+
+  /// Keep original module split alive because llvm::Function is kept as
+  /// reference in llvm::MachineFunctions and will be used during codegen.
+  LLVMModuleAndContext ModuleAndContext;
+
+  /// ExternContext to MachineModuleInfo to work around the upstream bug
+  /// with the move constructor of MachineModuleInfo.
+  std::unique_ptr<llvm::MCContext> McContext;
+
+  /// This is where all the MachineFunction live that we need for AsmPrint.
+  std::unique_ptr<llvm::MachineModuleInfo> MachineModuleInfo;
+
+  /// llvm::Function name to llvm::Function* map for concatenating the
+  /// MachineFunctions map.
+  llvm::StringMap<const llvm::Function *> FnNameToFnPtr;
+
+  /// Keep targetMachine alive.
+  std::unique_ptr<llvm::TargetMachine> TgtMachine;
+
+  /// parallel llvm module split id, mostly used for debugging.
+  std::optional<int> SplitIdx;
+};
+
+struct SymbolAndMCInfo {
+  SymbolAndMCInfo() = default;
+
+  SymbolAndMCInfo(SymbolAndMCInfo &&Other)
+      : SymbolLinkageTypes(std::move(Other.SymbolLinkageTypes)),
+        McInfos(std::move(Other.McInfos)) {}
+
+  /// Clear member variables explicitly.
+  void clear();
+
+  /// Book-keeping original symbol linkage type if they are changed due to
+  /// splitting for parallel compilation.
+  llvm::StringMap<llvm::GlobalValue::LinkageTypes> SymbolLinkageTypes;
+
+  /// Vector of codegen results for each parallel split before AsmPrint.
+  SmallVector<std::unique_ptr<MCInfo>> McInfos;
+};
+
+class MCLinker {
+public:
+  MCLinker(SmallVectorImpl<SymbolAndMCInfo *> &SymbolAndMCInfos,
+           llvm::TargetMachine &TgtMachine,
+           llvm::StringMap<llvm::GlobalValue::LinkageTypes> SymbolLinkageTypes,
+           llvm::StringMap<unsigned> OriginalFnOrdering);
+
+  /// Link multiple MC results and AsmPrint into one .o file.
+  ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
+  linkAndPrint(StringRef ModuleName);
+
+private:
+  SmallVectorImpl<SymbolAndMCInfo *> &SymbolAndMCInfos;
+  llvm::TargetMachine &TgtMachine;
+  SmallVector<MCInfo *> McInfos;
+  LLVMModuleAndContext LinkedModule;
+
+  llvm::StringMap<llvm::GlobalValue::LinkageTypes> SymbolLinkageTypes;
+  llvm::StringMap<unsigned> OriginalFnOrdering;
+  llvm::MachineModuleInfoWrapperPass *MachineModInfoPass = nullptr;
+
+  /// Link llvm::Modules from each split.
+  Expected<bool> linkLLVMModules(StringRef ModuleName);
+
+  // /// Get llvm::Module and prepare MachineModuleInfoWrapperPass to print if
+  // /// there is only one split.
+  // llvm::Module *
+  // getModuleToPrintOneSplit(llvm::TargetMachine &LlvmTgtMachine);
+
+  /// Prepare MachineModuleInfo before AsmPrinting.
+  void prepareMachineModuleInfo(llvm::TargetMachine &LlvmTgtMachine);
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt
index 2201fcda0a7fd..5879491ec2c5f 100644
--- a/llvm/lib/CMakeLists.txt
+++ b/llvm/lib/CMakeLists.txt
@@ -24,6 +24,7 @@ add_subdirectory(Analysis)
 add_subdirectory(LTO)
 add_subdirectory(MC)
 add_subdirectory(MCA)
+add_subdirectory(MCLinker)
 add_subdirectory(ModuleSplitter)
 add_subdirectory(ObjCopy)
 add_subdirectory(Object)
diff --git a/llvm/lib/MCLinker/CMakeLists.txt b/llvm/lib/MCLinker/CMakeLists.txt
new file mode 100644
index 0000000000000..fe7103b105576
--- /dev/null
+++ b/llvm/lib/MCLinker/CMakeLists.txt
@@ -0,0 +1,15 @@
+add_llvm_component_library(LLVMMCLinker
+  MCLinker.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/ModuleSplitter
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/MCLinker
+
+  LINK_COMPONENTS
+  Core
+  IRReader
+  BitReader
+  BitWriter
+  Support
+  TransformUtils
+)
diff --git a/llvm/lib/MCLinker/MCLinker.cpp b/llvm/lib/MCLinker/MCLinker.cpp
new file mode 100644
index 0000000000000..34e7cfaa82273
--- /dev/null
+++ b/llvm/lib/MCLinker/MCLinker.cpp
@@ -0,0 +1,15 @@
+//===--- MCLinker.cpp - MCLinker --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/MCLinker/MCLinker.h"
+
+using namespace llvm;
+#define DEBUG_TYPE "mclinker"
diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index 1f04672489169..8ecd17287c272 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
@@ -2085,6 +2085,26 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "MCLinker",
+    srcs = glob([
+        "lib/MCLinker/*.cpp",
+    ]),
+    hdrs = glob([
+        "include/llvm/ModuleSplitter/*.h",
+        "include/llvm/MCLinker/*.h",
+    ]),
+    copts = llvm_copts,
+    deps = [
+        ":BitReader",
+        ":BitWriter",
+        ":Core",
+        ":IRReader",
+        ":Support",
+        ":TransformUtils",
+    ],
+)
+
 cc_library(
     name = "ModuleSplitter",
     srcs = glob([

>From f1fb86d5fe0c6c97a2e0d632b45f66591d4a6e31 Mon Sep 17 00:00:00 2001
From: Weiwei Chen <weiwei.chen at modular.com>
Date: Tue, 25 Mar 2025 12:12:49 -0400
Subject: [PATCH 15/17] checkpoint.

---
 llvm/include/llvm/MCLinker/MCLinker.h |  23 +-
 llvm/lib/MCLinker/MCLinker.cpp        | 320 ++++++++++++++++++++++++++
 2 files changed, 333 insertions(+), 10 deletions(-)

diff --git a/llvm/include/llvm/MCLinker/MCLinker.h b/llvm/include/llvm/MCLinker/MCLinker.h
index 9c8721700975e..41acbb2a451b7 100644
--- a/llvm/include/llvm/MCLinker/MCLinker.h
+++ b/llvm/include/llvm/MCLinker/MCLinker.h
@@ -32,18 +32,17 @@ namespace llvm {
 /// (instead of one .a file with multiple .o files in .a) with reduced
 /// object file size (due to symbol dedup and linkage restoration).
 
+//==============================================================================
+// MCInfo
+//==============================================================================
+
 struct MCInfo {
   MCInfo(std::unique_ptr<llvm::MachineModuleInfo> &&MachineModuleInfo,
          LLVMModuleAndContext &&ModuleAndContext,
          llvm::StringMap<const llvm::Function *> &FnNameToFnPtr,
          std::unique_ptr<llvm::TargetMachine> &&TgtMachine,
          std::unique_ptr<llvm::MCContext> &&McContext,
-         std::optional<int> SplitIdx)
-      : ModuleAndContext(std::move(ModuleAndContext)),
-        McContext(std::move(McContext)),
-        MachineModuleInfo(std::move(MachineModuleInfo)),
-        FnNameToFnPtr(std::move(FnNameToFnPtr)),
-        TgtMachine(std::move(TgtMachine)), SplitIdx(SplitIdx){};
+         std::optional<int> SplitIdx);
 
   MCInfo(MCInfo &&Other)
       : ModuleBuf(std::move(Other.ModuleBuf)),
@@ -56,7 +55,7 @@ struct MCInfo {
   /// Serialize the llvm::Module into bytecode.
   //  We will deserialize it back to put into
   /// a different LLVMContext that is required for linking using llvm::Linker.
-  std::unique_ptr<WritableMemoryBuffer> ModuleBuf;
+  std::unique_ptr<WritableMemoryBuffer> ModuleBuf = nullptr;
 
   /// Keep original module split alive because llvm::Function is kept as
   /// reference in llvm::MachineFunctions and will be used during codegen.
@@ -80,6 +79,11 @@ struct MCInfo {
   std::optional<int> SplitIdx;
 };
 
+
+//==============================================================================
+// SymbolAndMCInfo
+//==============================================================================
+
 struct SymbolAndMCInfo {
   SymbolAndMCInfo() = default;
 
@@ -102,8 +106,7 @@ class MCLinker {
 public:
   MCLinker(SmallVectorImpl<SymbolAndMCInfo *> &SymbolAndMCInfos,
            llvm::TargetMachine &TgtMachine,
-           llvm::StringMap<llvm::GlobalValue::LinkageTypes> SymbolLinkageTypes,
-           llvm::StringMap<unsigned> OriginalFnOrdering);
+           llvm::StringMap<llvm::GlobalValue::LinkageTypes> SymbolLinkageTypes);
 
   /// Link multiple MC results and AsmPrint into one .o file.
   ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
@@ -116,7 +119,7 @@ class MCLinker {
   LLVMModuleAndContext LinkedModule;
 
   llvm::StringMap<llvm::GlobalValue::LinkageTypes> SymbolLinkageTypes;
-  llvm::StringMap<unsigned> OriginalFnOrdering;
+  // llvm::StringMap<unsigned> OriginalFnOrdering;
   llvm::MachineModuleInfoWrapperPass *MachineModInfoPass = nullptr;
 
   /// Link llvm::Modules from each split.
diff --git a/llvm/lib/MCLinker/MCLinker.cpp b/llvm/lib/MCLinker/MCLinker.cpp
index 34e7cfaa82273..5411533097729 100644
--- a/llvm/lib/MCLinker/MCLinker.cpp
+++ b/llvm/lib/MCLinker/MCLinker.cpp
@@ -11,5 +11,325 @@
 
 #include "llvm/MCLinker/MCLinker.h"
 
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+
 using namespace llvm;
 #define DEBUG_TYPE "mclinker"
+
+//==============================================================================
+// MCInfo
+//==============================================================================
+
+MCInfo::MCInfo(std::unique_ptr<llvm::MachineModuleInfo> &&MachineModuleInfo,
+         LLVMModuleAndContext &&ModuleAndContext,
+         llvm::StringMap<const llvm::Function *> &FnNameToFnPtr,
+         std::unique_ptr<llvm::TargetMachine> &&TgtMachine,
+         std::unique_ptr<llvm::MCContext> &&McContext,
+         std::optional<int> SplitIdx)
+      : ModuleAndContext(std::move(ModuleAndContext)),
+        McContext(std::move(McContext)),
+        MachineModuleInfo(std::move(MachineModuleInfo)),
+        FnNameToFnPtr(std::move(FnNameToFnPtr)),
+        TgtMachine(std::move(TgtMachine)), SplitIdx(SplitIdx){
+  std::string BufStr;
+  llvm::raw_string_ostream BufOS(BufStr);
+  llvm::WriteBitcodeToFile(*ModuleAndContext, BufOS);
+  ModuleBuf = WritableMemoryBuffer::getNewUninitMemBuffer(BufStr.size());
+  memcpy(ModuleBuf->getBufferStart(), BufStr.c_str(), BufStr.size());
+}
+
+//==============================================================================
+// SymbolAndMCInfo
+//==============================================================================
+
+void SymbolAndMCInfo::clear() {
+  SymbolLinkageTypes.clear();
+  McInfos.clear();
+}
+
+//==============================================================================
+// MCLinker
+//==============================================================================
+
+MCLinker::MCLinker(
+    SmallVectorImpl<SymbolAndMCInfo *> &SymbolAndMCInfos,
+    llvm::TargetMachine &TgtMachine,
+    llvm::StringMap<llvm::GlobalValue::LinkageTypes> SymbolLinkageTypes)
+    : SymbolAndMCInfos(SymbolAndMCInfos), TgtMachine(TgtMachine),
+      SymbolLinkageTypes(std::move(SymbolLinkageTypes)) {
+
+  llvm::TargetMachine &LLVMTgtMachine =
+      static_cast<llvm::TargetMachine &>(TgtMachine);
+
+  MachineModInfoPass =
+      new llvm::MachineModuleInfoWrapperPass(&LLVMTgtMachine);
+}
+
+
+Expected<bool> MCLinker::linkLLVMModules(StringRef moduleName) {
+  Expected<bool> createModuleResult =
+      LinkedModule.create([&](llvm::LLVMContext &ctx) {
+        return std::make_unique<llvm::Module>(moduleName, ctx);
+      });
+
+  if (createModuleResult.isError())
+    return Error("failed to create an empty LLVMModule for MCLinker");
+
+  llvm::Linker linker(*linkedModule);
+
+  for (auto [i, smcInfos] : llvm::enumerate(symbolAndMCInfos)) {
+    for (auto &[key, value] : smcInfos->symbolLinkageTypes)
+      symbolLinkageTypes.insert({key, value});
+
+    for (auto [j, mcInfo] : llvm::enumerate(smcInfos->mcInfos)) {
+      mcInfos.push_back(mcInfo.get());
+
+      // Modules have to be in the same LLVMContext to be linked.
+      llvm::Expected<std::unique_ptr<llvm::Module>> moduleOr =
+          llvm::parseBitcodeFile(
+              llvm::MemoryBufferRef(
+                  StringRef(mcInfo->moduleBuf->getBufferStart(),
+                            mcInfo->moduleBuf->getBufferSize()),
+                  ""),
+              linkedModule->getContext());
+      if (!moduleOr)
+        return Error("failed to serialize post-llc modules");
+
+      std::unique_ptr<llvm::Module> module = std::move(moduleOr.get());
+      if (linker.linkInModule(std::move(module)))
+        return Error("failed to link post-llc modules");
+      mcInfo->mcContext->setUseNamesOnTempLabels(true);
+    }
+  }
+
+  // Restore linkage type.
+  for (llvm::GlobalValue &global : linkedModule->globals()) {
+    if (!global.hasWeakLinkage())
+      continue;
+    auto iter = symbolLinkageTypes.find(global.getName().str());
+    if (iter == symbolLinkageTypes.end())
+      continue;
+
+    global.setLinkage(iter->second);
+    global.setDSOLocal(true);
+  }
+
+  for (llvm::Function &fn : linkedModule->functions()) {
+    if (!fn.hasWeakLinkage())
+      continue;
+
+    auto iter = symbolLinkageTypes.find(fn.getName().str());
+    if (iter == symbolLinkageTypes.end())
+      continue;
+
+    fn.setLinkage(iter->second);
+    fn.setDSOLocal(true);
+  }
+
+  return {};
+}
+
+void MCLinker::prepareMachineModuleInfo(
+    llvm::TargetMachine &llvmTargetMachine) {
+  for (auto [i, smcInfos] : llvm::enumerate(symbolAndMCInfos)) {
+    for (auto [j, mcInfo] : llvm::enumerate(smcInfos->mcInfos)) {
+      // Move MachineFunctions from each split's codegen result
+      // into machineModInfoPass to print out together in one .o
+      llvm::DenseMap<const llvm::Function *,
+                     std::unique_ptr<llvm::MachineFunction>> &machineFunctions =
+          getMachineFunctionsFromMachineModuleInfo(*mcInfo->machineModuleInfo);
+
+      llvm::StringMap<const llvm::Function *> &fnNameToFnPtr =
+          mcInfo->fnNameToFnPtr;
+
+      mcInfo->machineModuleInfo->getContext().setObjectFileInfo(
+          llvmTargetMachine.getObjFileLowering());
+
+      for (auto &fn : linkedModule->functions()) {
+        if (fn.isDeclaration())
+          continue;
+        if (machineModInfoPass->getMMI().getMachineFunction(fn))
+          continue;
+
+        auto fnPtrIter = fnNameToFnPtr.find(fn.getName().str());
+        if (fnPtrIter == fnNameToFnPtr.end())
+          continue;
+        auto mfPtrIter = machineFunctions.find(fnPtrIter->second);
+        if (mfPtrIter == machineFunctions.end())
+          continue;
+
+        llvm::Function &origFn = mfPtrIter->second->getFunction();
+
+        machineModInfoPass->getMMI().insertFunction(
+            fn, std::move(mfPtrIter->second));
+
+        // Restore function linkage types.
+        if (!origFn.hasWeakLinkage())
+          continue;
+
+        auto iter = symbolLinkageTypes.find(fn.getName().str());
+        if (iter == symbolLinkageTypes.end())
+          continue;
+
+        origFn.setLinkage(iter->second);
+        origFn.setDSOLocal(true);
+      }
+
+      // Restore global variable linkage types.
+      for (auto &global : mcInfo->moduleAndContext->globals()) {
+        if (!global.hasWeakLinkage())
+          continue;
+        auto iter = symbolLinkageTypes.find(global.getName().str());
+        if (iter == symbolLinkageTypes.end())
+          continue;
+
+        global.setLinkage(iter->second);
+        global.setDSOLocal(true);
+      }
+
+      // Release memory as soon as possible to reduce peak memory footprint.
+      mcInfo->machineModuleInfo.reset();
+      mcInfo->fnNameToFnPtr.clear();
+      mcInfo->moduleBuf.reset();
+    }
+  }
+}
+
+llvm::Module *
+MCLinker::getModuleToPrintOneSplit(llvm::TargetMachine &llvmTargetMachine) {
+  auto &mcInfo = symbolAndMCInfos[0]->mcInfos[0];
+
+  llvm::DenseMap<const llvm::Function *, std::unique_ptr<llvm::MachineFunction>>
+      &machineFunctions =
+          getMachineFunctionsFromMachineModuleInfo(*mcInfo->machineModuleInfo);
+
+  mcInfo->machineModuleInfo->getContext().setObjectFileInfo(
+      llvmTargetMachine.getObjFileLowering());
+
+  for (auto &fn : mcInfo->moduleAndContext->functions()) {
+    if (fn.isDeclaration())
+      continue;
+
+    auto mfPtrIter = machineFunctions.find(&fn);
+    if (mfPtrIter == machineFunctions.end())
+      continue;
+
+    machineModInfoPass->getMMI().insertFunction(fn,
+                                                std::move(mfPtrIter->second));
+  }
+
+  mcInfo->mcContext->setUseNamesOnTempLabels(true);
+  // Release memory as soon as possible to reduce peak memory footprint.
+  mcInfo->machineModuleInfo.reset();
+  mcInfo->fnNameToFnPtr.clear();
+  mcInfo->moduleBuf.reset();
+  return &(*mcInfo->moduleAndContext);
+}
+
+ErrorOr<WriteableBufferRef> MCLinker::linkAndPrint(StringRef moduleName,
+                                                   bool emitAssembly) {
+
+  llvm::TargetMachine &llvmTargetMachine =
+      static_cast<llvm::TargetMachine &>(targetMachine);
+
+  llvmTargetMachine.Options.MCOptions.AsmVerbose = options.verboseOutput;
+  llvmTargetMachine.Options.MCOptions.PreserveAsmComments =
+      options.verboseOutput;
+
+  bool hasOneSplit =
+      symbolAndMCInfos.size() == 1 && symbolAndMCInfos[0]->mcInfos.size() == 1;
+
+  llvm::Module *oneSplitModule = nullptr;
+
+  if (!hasOneSplit) {
+    if (isNVPTXBackend(options)) {
+      // For NVPTX backend to avoid false hit
+      // with its stale AnnotationCache which is populated during both
+      // llvm-opt and llc pipeline passes but is only cleared at the end of
+      // codegen in AsmPrint. We need to make sure that llvm-opt and llc
+      // are using the sname llvm::Module to that the cache can be properly
+      // cleaned. We currently achieve this by keeping only one split for NVPTX
+      // compilation.
+      return Error("NVPTX compilation should have multiple splits.");
+    }
+
+    // link at llvm::Module level.
+    ErrorOrSuccess lmResult = linkLLVMModules(moduleName);
+    if (lmResult.isError())
+      return Error(lmResult.getError());
+
+    prepareMachineModuleInfo(llvmTargetMachine);
+
+    // Function ordering may be changed in the linkedModule due to Linker,
+    // but the original order matters for NVPTX backend to generate function
+    // declaration properly to avoid use before def/decl illegal instructions.
+    // Sort the linkedModule's functions back to to its original order
+    // (only definition matter, declaration doesn't).
+    if (isNVPTXBackend(options)) {
+      linkedModule->getFunctionList().sort(
+          [&](const auto &lhs, const auto &rhs) {
+            if (lhs.isDeclaration() && rhs.isDeclaration())
+              return true;
+
+            if (lhs.isDeclaration())
+              return false;
+
+            if (rhs.isDeclaration())
+              return true;
+
+            auto iter1 = originalFnOrdering.find(lhs.getName());
+            if (iter1 == originalFnOrdering.end())
+              return true;
+            auto iter2 = originalFnOrdering.find(rhs.getName());
+            if (iter2 == originalFnOrdering.end())
+              return true;
+
+            return iter1->second < iter2->second;
+          });
+    }
+  } else {
+    oneSplitModule = getModuleToPrintOneSplit(llvmTargetMachine);
+    oneSplitModule->setModuleIdentifier(moduleName);
+  }
+
+  // Prepare AsmPrint pipeline.
+  WriteableBufferRef linkedObj = WriteableBuffer::get();
+
+  llvm::legacy::PassManager passMgr;
+  // Add an appropriate TargetLibraryInfo pass for the module's triple.
+  llvm::TargetLibraryInfoImpl targetLibInfo(llvm::Triple(options.targetTriple));
+
+  // Add AsmPrint pass and run the pass manager.
+  passMgr.add(new llvm::TargetLibraryInfoWrapperPass(targetLibInfo));
+  if (KGEN::addPassesToAsmPrint(options, llvmTargetMachine, passMgr, *linkedObj,
+                                emitAssembly
+                                    ? llvm::CodeGenFileType::AssemblyFile
+                                    : llvm::CodeGenFileType::ObjectFile,
+                                true, machineModInfoPass, mcInfos)) {
+    // Release some of the AsyncValue memory to avoid
+    // wrong version of LLVMContext destructor being called due to
+    // multiple LLVM being statically linked in dylibs that have
+    // access to this code path.
+    for (SymbolAndMCInfo *smcInfo : symbolAndMCInfos)
+      smcInfo->clear();
+
+    return Error("failed to add to ObjectFile Print pass");
+  }
+
+  const_cast<llvm::TargetLoweringObjectFile *>(
+      llvmTargetMachine.getObjFileLowering())
+      ->Initialize(machineModInfoPass->getMMI().getContext(), targetMachine);
+
+  llvm::Module &moduleToRun = hasOneSplit ? *oneSplitModule : *linkedModule;
+  passMgr.run(moduleToRun);
+
+  // Release some of the AsyncValue memory to avoid
+  // wrong version of LLVMContext destructor being called due to
+  // multiple LLVM being statically linked in dylibs that have
+  // access to this code path.
+  for (SymbolAndMCInfo *smcInfo : symbolAndMCInfos)
+    smcInfo->clear();
+
+  return linkedObj;
+}

>From 4ff42900bf3c035553e6936c2ce698342c386842 Mon Sep 17 00:00:00 2001
From: Weiwei Chen <weiwei.chen at modular.com>
Date: Tue, 25 Mar 2025 12:13:10 -0400
Subject: [PATCH 16/17] Add return.

---
 llvm/lib/ModuleSplitter/ModuleSplitter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/ModuleSplitter/ModuleSplitter.cpp b/llvm/lib/ModuleSplitter/ModuleSplitter.cpp
index 5334eccfa991c..3138fbbcc58ce 100644
--- a/llvm/lib/ModuleSplitter/ModuleSplitter.cpp
+++ b/llvm/lib/ModuleSplitter/ModuleSplitter.cpp
@@ -223,7 +223,7 @@ static LLVMModuleAndContext readAndMaterializeDependencies(
                                          /*ShouldLazyLoadMetadata=*/false));
         });
     if (!CreateOr)
-      LLVMModuleAndContext();
+      return LLVMModuleAndContext();
 
     Result->setModuleInlineAsm("");
   }

>From e8879e1e50af9885636885d55ae6aceb660999ed Mon Sep 17 00:00:00 2001
From: Weiwei Chen <weiwei.chen at modular.com>
Date: Tue, 25 Mar 2025 16:19:57 -0400
Subject: [PATCH 17/17] Add MCLinker and friends.

---
 llvm/include/llvm/MCLinker/MCLinker.h   |  10 +-
 llvm/include/llvm/MCLinker/MCPipeline.h |  37 +++
 llvm/lib/MCLinker/CMakeLists.txt        |   7 +-
 llvm/lib/MCLinker/MCLinker.cpp          | 320 ++++++++++--------------
 llvm/lib/MCLinker/MCLinkerUtils.cpp     | 156 ++++++++++++
 llvm/lib/MCLinker/MCLinkerUtils.h       |  63 +++++
 llvm/lib/MCLinker/MCPipeline.cpp        | 159 ++++++++++++
 7 files changed, 554 insertions(+), 198 deletions(-)
 create mode 100644 llvm/include/llvm/MCLinker/MCPipeline.h
 create mode 100644 llvm/lib/MCLinker/MCLinkerUtils.cpp
 create mode 100644 llvm/lib/MCLinker/MCLinkerUtils.h
 create mode 100644 llvm/lib/MCLinker/MCPipeline.cpp

diff --git a/llvm/include/llvm/MCLinker/MCLinker.h b/llvm/include/llvm/MCLinker/MCLinker.h
index 41acbb2a451b7..7050f181a97ad 100644
--- a/llvm/include/llvm/MCLinker/MCLinker.h
+++ b/llvm/include/llvm/MCLinker/MCLinker.h
@@ -10,8 +10,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_MCLINKER_H
-#define LLVM_MCLINKER_H
+#ifndef LLVM_MCLINKER_MCLINKER_H
+#define LLVM_MCLINKER_MCLINKER_H
 
 #include "llvm/ADT/StringMap.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -79,7 +79,6 @@ struct MCInfo {
   std::optional<int> SplitIdx;
 };
 
-
 //==============================================================================
 // SymbolAndMCInfo
 //==============================================================================
@@ -109,8 +108,9 @@ class MCLinker {
            llvm::StringMap<llvm::GlobalValue::LinkageTypes> SymbolLinkageTypes);
 
   /// Link multiple MC results and AsmPrint into one .o file.
-  ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
-  linkAndPrint(StringRef ModuleName);
+  Expected<std::unique_ptr<WritableMemoryBuffer>>
+  linkAndPrint(StringRef ModuleName, llvm::CodeGenFileType CodegenType,
+               bool VerboseOutput);
 
 private:
   SmallVectorImpl<SymbolAndMCInfo *> &SymbolAndMCInfos;
diff --git a/llvm/include/llvm/MCLinker/MCPipeline.h b/llvm/include/llvm/MCLinker/MCPipeline.h
new file mode 100644
index 0000000000000..1d78d996ace49
--- /dev/null
+++ b/llvm/include/llvm/MCLinker/MCPipeline.h
@@ -0,0 +1,37 @@
+//===- MCPipeline.h - Passes to run with MCLinker  --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCLINKER_MCPIPELINE_H
+#define LLVM_MCLINKER_MCPIPELINE_H
+
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/MCLinker/MCLinker.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+namespace mclinker {
+/// Build a pipeline that does machine specific codgen but stops before
+/// AsmPrint.
+bool addPassesToEmitMC(llvm::TargetMachine &, llvm::legacy::PassManagerBase &,
+                       llvm::raw_pwrite_stream &, bool,
+                       llvm::MachineModuleInfoWrapperPass *, unsigned);
+
+/// Build a pipeline that does AsmPrint only.
+bool addPassesToAsmPrint(llvm::TargetMachine &, llvm::legacy::PassManagerBase &,
+                         llvm::raw_pwrite_stream &, llvm::CodeGenFileType, bool,
+                         llvm::MachineModuleInfoWrapperPass *,
+                         llvm::SmallVectorImpl<MCInfo *> &);
+} // namespace mclinker
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/MCLinker/CMakeLists.txt b/llvm/lib/MCLinker/CMakeLists.txt
index fe7103b105576..67728c5fb0c34 100644
--- a/llvm/lib/MCLinker/CMakeLists.txt
+++ b/llvm/lib/MCLinker/CMakeLists.txt
@@ -1,15 +1,18 @@
 add_llvm_component_library(LLVMMCLinker
+  MCLinkerUtils.cpp
   MCLinker.cpp
+  MCPipeline.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/ModuleSplitter
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/MCLinker
 
   LINK_COMPONENTS
-  Core
-  IRReader
   BitReader
   BitWriter
+  Core
+  IRReader
+  Linker
   Support
   TransformUtils
 )
diff --git a/llvm/lib/MCLinker/MCLinker.cpp b/llvm/lib/MCLinker/MCLinker.cpp
index 5411533097729..178933e803c0e 100644
--- a/llvm/lib/MCLinker/MCLinker.cpp
+++ b/llvm/lib/MCLinker/MCLinker.cpp
@@ -8,11 +8,19 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #include "llvm/MCLinker/MCLinker.h"
+#include "MCLinkerUtils.h"
+#include "llvm/MCLinker/MCPipeline.h"
 
+#include "MCLinkerUtils.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Linker/Linker.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 #define DEBUG_TYPE "mclinker"
@@ -22,16 +30,16 @@ using namespace llvm;
 //==============================================================================
 
 MCInfo::MCInfo(std::unique_ptr<llvm::MachineModuleInfo> &&MachineModuleInfo,
-         LLVMModuleAndContext &&ModuleAndContext,
-         llvm::StringMap<const llvm::Function *> &FnNameToFnPtr,
-         std::unique_ptr<llvm::TargetMachine> &&TgtMachine,
-         std::unique_ptr<llvm::MCContext> &&McContext,
-         std::optional<int> SplitIdx)
-      : ModuleAndContext(std::move(ModuleAndContext)),
-        McContext(std::move(McContext)),
-        MachineModuleInfo(std::move(MachineModuleInfo)),
-        FnNameToFnPtr(std::move(FnNameToFnPtr)),
-        TgtMachine(std::move(TgtMachine)), SplitIdx(SplitIdx){
+               LLVMModuleAndContext &&ModuleAndContext,
+               llvm::StringMap<const llvm::Function *> &FnNameToFnPtr,
+               std::unique_ptr<llvm::TargetMachine> &&TgtMachine,
+               std::unique_ptr<llvm::MCContext> &&McContext,
+               std::optional<int> SplitIdx)
+    : ModuleAndContext(std::move(ModuleAndContext)),
+      McContext(std::move(McContext)),
+      MachineModuleInfo(std::move(MachineModuleInfo)),
+      FnNameToFnPtr(std::move(FnNameToFnPtr)),
+      TgtMachine(std::move(TgtMachine)), SplitIdx(SplitIdx) {
   std::string BufStr;
   llvm::raw_string_ostream BufOS(BufStr);
   llvm::WriteBitcodeToFile(*ModuleAndContext, BufOS);
@@ -62,274 +70,204 @@ MCLinker::MCLinker(
   llvm::TargetMachine &LLVMTgtMachine =
       static_cast<llvm::TargetMachine &>(TgtMachine);
 
-  MachineModInfoPass =
-      new llvm::MachineModuleInfoWrapperPass(&LLVMTgtMachine);
+  MachineModInfoPass = new llvm::MachineModuleInfoWrapperPass(&LLVMTgtMachine);
 }
 
-
 Expected<bool> MCLinker::linkLLVMModules(StringRef moduleName) {
-  Expected<bool> createModuleResult =
+  Expected<bool> CreateModuleOr =
       LinkedModule.create([&](llvm::LLVMContext &ctx) {
         return std::make_unique<llvm::Module>(moduleName, ctx);
       });
 
-  if (createModuleResult.isError())
-    return Error("failed to create an empty LLVMModule for MCLinker");
+  if (!CreateModuleOr) {
+    return make_error<StringError>(
+        "failed to create an empty LLVMModule for MCLinker",
+        inconvertibleErrorCode());
+  }
 
-  llvm::Linker linker(*linkedModule);
+  llvm::Linker ModuleLinker(*LinkedModule);
 
-  for (auto [i, smcInfos] : llvm::enumerate(symbolAndMCInfos)) {
-    for (auto &[key, value] : smcInfos->symbolLinkageTypes)
-      symbolLinkageTypes.insert({key, value});
+  for (auto [i, SmcInfos] : llvm::enumerate(SymbolAndMCInfos)) {
+    for (auto &[key, value] : SmcInfos->SymbolLinkageTypes)
+      SymbolLinkageTypes.insert({key, value});
 
-    for (auto [j, mcInfo] : llvm::enumerate(smcInfos->mcInfos)) {
-      mcInfos.push_back(mcInfo.get());
+    for (auto [j, McInfo] : llvm::enumerate(SmcInfos->McInfos)) {
+      McInfos.push_back(McInfo.get());
 
       // Modules have to be in the same LLVMContext to be linked.
-      llvm::Expected<std::unique_ptr<llvm::Module>> moduleOr =
+      llvm::Expected<std::unique_ptr<llvm::Module>> ModuleOr =
           llvm::parseBitcodeFile(
               llvm::MemoryBufferRef(
-                  StringRef(mcInfo->moduleBuf->getBufferStart(),
-                            mcInfo->moduleBuf->getBufferSize()),
+                  StringRef(McInfo->ModuleBuf->getBufferStart(),
+                            McInfo->ModuleBuf->getBufferSize()),
                   ""),
-              linkedModule->getContext());
-      if (!moduleOr)
-        return Error("failed to serialize post-llc modules");
-
-      std::unique_ptr<llvm::Module> module = std::move(moduleOr.get());
-      if (linker.linkInModule(std::move(module)))
-        return Error("failed to link post-llc modules");
-      mcInfo->mcContext->setUseNamesOnTempLabels(true);
+              LinkedModule->getContext());
+      if (!ModuleOr) {
+        return make_error<StringError>("failed to serialize post-llc modules",
+                                       inconvertibleErrorCode());
+      }
+
+      std::unique_ptr<llvm::Module> M = std::move(ModuleOr.get());
+
+      if (ModuleLinker.linkInModule(std::move(M))) {
+        return make_error<StringError>("failed to link post-llc modules",
+                                       inconvertibleErrorCode());
+      }
+
+      McInfo->McContext->setUseNamesOnTempLabels(true);
     }
   }
 
-  // Restore linkage type.
-  for (llvm::GlobalValue &global : linkedModule->globals()) {
-    if (!global.hasWeakLinkage())
+  // Restore linkage type!
+  for (llvm::GlobalValue &G : LinkedModule->globals()) {
+    if (!G.hasWeakLinkage())
       continue;
-    auto iter = symbolLinkageTypes.find(global.getName().str());
-    if (iter == symbolLinkageTypes.end())
+    auto Iter = SymbolLinkageTypes.find(G.getName().str());
+    if (Iter == SymbolLinkageTypes.end())
       continue;
 
-    global.setLinkage(iter->second);
-    global.setDSOLocal(true);
+    G.setLinkage(Iter->second);
+    G.setDSOLocal(true);
   }
 
-  for (llvm::Function &fn : linkedModule->functions()) {
-    if (!fn.hasWeakLinkage())
+  for (llvm::Function &F : LinkedModule->functions()) {
+    if (!F.hasWeakLinkage())
       continue;
 
-    auto iter = symbolLinkageTypes.find(fn.getName().str());
-    if (iter == symbolLinkageTypes.end())
+    auto Iter = SymbolLinkageTypes.find(F.getName().str());
+    if (Iter == SymbolLinkageTypes.end())
       continue;
 
-    fn.setLinkage(iter->second);
-    fn.setDSOLocal(true);
+    F.setLinkage(Iter->second);
+    F.setDSOLocal(true);
   }
 
-  return {};
+  return true;
 }
 
 void MCLinker::prepareMachineModuleInfo(
     llvm::TargetMachine &llvmTargetMachine) {
-  for (auto [i, smcInfos] : llvm::enumerate(symbolAndMCInfos)) {
-    for (auto [j, mcInfo] : llvm::enumerate(smcInfos->mcInfos)) {
+  for (auto [i, SmcInfos] : llvm::enumerate(SymbolAndMCInfos)) {
+    for (auto [j, McInfo] : llvm::enumerate(SmcInfos->McInfos)) {
       // Move MachineFunctions from each split's codegen result
       // into machineModInfoPass to print out together in one .o
       llvm::DenseMap<const llvm::Function *,
                      std::unique_ptr<llvm::MachineFunction>> &machineFunctions =
-          getMachineFunctionsFromMachineModuleInfo(*mcInfo->machineModuleInfo);
+          llvm::mclinker::getMachineFunctionsFromMachineModuleInfo(
+              *McInfo->MachineModuleInfo);
 
-      llvm::StringMap<const llvm::Function *> &fnNameToFnPtr =
-          mcInfo->fnNameToFnPtr;
+      llvm::StringMap<const llvm::Function *> &FnNameToFnPtr =
+          McInfo->FnNameToFnPtr;
 
-      mcInfo->machineModuleInfo->getContext().setObjectFileInfo(
-          llvmTargetMachine.getObjFileLowering());
+      McInfo->MachineModuleInfo->getContext().setObjectFileInfo(
+          TgtMachine.getObjFileLowering());
 
-      for (auto &fn : linkedModule->functions()) {
-        if (fn.isDeclaration())
+      for (auto &Fn : LinkedModule->functions()) {
+        if (Fn.isDeclaration())
           continue;
-        if (machineModInfoPass->getMMI().getMachineFunction(fn))
+        if (MachineModInfoPass->getMMI().getMachineFunction(Fn))
           continue;
 
-        auto fnPtrIter = fnNameToFnPtr.find(fn.getName().str());
-        if (fnPtrIter == fnNameToFnPtr.end())
+        auto FnPtrIter = FnNameToFnPtr.find(Fn.getName().str());
+        if (FnPtrIter == FnNameToFnPtr.end())
           continue;
-        auto mfPtrIter = machineFunctions.find(fnPtrIter->second);
-        if (mfPtrIter == machineFunctions.end())
+        auto MfPtrIter = machineFunctions.find(FnPtrIter->second);
+        if (MfPtrIter == machineFunctions.end())
           continue;
 
-        llvm::Function &origFn = mfPtrIter->second->getFunction();
+        llvm::Function &OrigFn = MfPtrIter->second->getFunction();
 
-        machineModInfoPass->getMMI().insertFunction(
-            fn, std::move(mfPtrIter->second));
+        MachineModInfoPass->getMMI().insertFunction(
+            Fn, std::move(MfPtrIter->second));
 
         // Restore function linkage types.
-        if (!origFn.hasWeakLinkage())
+        if (!OrigFn.hasWeakLinkage())
           continue;
 
-        auto iter = symbolLinkageTypes.find(fn.getName().str());
-        if (iter == symbolLinkageTypes.end())
+        auto Iter = SymbolLinkageTypes.find(Fn.getName().str());
+        if (Iter == SymbolLinkageTypes.end())
           continue;
 
-        origFn.setLinkage(iter->second);
-        origFn.setDSOLocal(true);
+        OrigFn.setLinkage(Iter->second);
+        OrigFn.setDSOLocal(true);
       }
 
       // Restore global variable linkage types.
-      for (auto &global : mcInfo->moduleAndContext->globals()) {
-        if (!global.hasWeakLinkage())
+      for (auto &G : McInfo->ModuleAndContext->globals()) {
+        if (!G.hasWeakLinkage())
           continue;
-        auto iter = symbolLinkageTypes.find(global.getName().str());
-        if (iter == symbolLinkageTypes.end())
+        auto Iter = SymbolLinkageTypes.find(G.getName().str());
+        if (Iter == SymbolLinkageTypes.end())
           continue;
 
-        global.setLinkage(iter->second);
-        global.setDSOLocal(true);
+        G.setLinkage(Iter->second);
+        G.setDSOLocal(true);
       }
 
       // Release memory as soon as possible to reduce peak memory footprint.
-      mcInfo->machineModuleInfo.reset();
-      mcInfo->fnNameToFnPtr.clear();
-      mcInfo->moduleBuf.reset();
+      McInfo->MachineModuleInfo.reset();
+      McInfo->FnNameToFnPtr.clear();
+      McInfo->ModuleBuf.reset();
     }
   }
 }
 
-llvm::Module *
-MCLinker::getModuleToPrintOneSplit(llvm::TargetMachine &llvmTargetMachine) {
-  auto &mcInfo = symbolAndMCInfos[0]->mcInfos[0];
-
-  llvm::DenseMap<const llvm::Function *, std::unique_ptr<llvm::MachineFunction>>
-      &machineFunctions =
-          getMachineFunctionsFromMachineModuleInfo(*mcInfo->machineModuleInfo);
-
-  mcInfo->machineModuleInfo->getContext().setObjectFileInfo(
-      llvmTargetMachine.getObjFileLowering());
-
-  for (auto &fn : mcInfo->moduleAndContext->functions()) {
-    if (fn.isDeclaration())
-      continue;
-
-    auto mfPtrIter = machineFunctions.find(&fn);
-    if (mfPtrIter == machineFunctions.end())
-      continue;
-
-    machineModInfoPass->getMMI().insertFunction(fn,
-                                                std::move(mfPtrIter->second));
-  }
-
-  mcInfo->mcContext->setUseNamesOnTempLabels(true);
-  // Release memory as soon as possible to reduce peak memory footprint.
-  mcInfo->machineModuleInfo.reset();
-  mcInfo->fnNameToFnPtr.clear();
-  mcInfo->moduleBuf.reset();
-  return &(*mcInfo->moduleAndContext);
-}
-
-ErrorOr<WriteableBufferRef> MCLinker::linkAndPrint(StringRef moduleName,
-                                                   bool emitAssembly) {
-
-  llvm::TargetMachine &llvmTargetMachine =
-      static_cast<llvm::TargetMachine &>(targetMachine);
-
-  llvmTargetMachine.Options.MCOptions.AsmVerbose = options.verboseOutput;
-  llvmTargetMachine.Options.MCOptions.PreserveAsmComments =
-      options.verboseOutput;
+Expected<std::unique_ptr<WritableMemoryBuffer>>
+MCLinker::linkAndPrint(StringRef ModuleName, llvm::CodeGenFileType CodegenType,
+                       bool VerboseOutput) {
 
-  bool hasOneSplit =
-      symbolAndMCInfos.size() == 1 && symbolAndMCInfos[0]->mcInfos.size() == 1;
+  llvm::TargetMachine &LLVMTgtMachine =
+      static_cast<llvm::TargetMachine &>(TgtMachine);
 
-  llvm::Module *oneSplitModule = nullptr;
+  LLVMTgtMachine.Options.MCOptions.AsmVerbose = VerboseOutput;
+  LLVMTgtMachine.Options.MCOptions.PreserveAsmComments = VerboseOutput;
 
-  if (!hasOneSplit) {
-    if (isNVPTXBackend(options)) {
-      // For NVPTX backend to avoid false hit
-      // with its stale AnnotationCache which is populated during both
-      // llvm-opt and llc pipeline passes but is only cleared at the end of
-      // codegen in AsmPrint. We need to make sure that llvm-opt and llc
-      // are using the sname llvm::Module to that the cache can be properly
-      // cleaned. We currently achieve this by keeping only one split for NVPTX
-      // compilation.
-      return Error("NVPTX compilation should have multiple splits.");
-    }
+  // link at llvm::Module level.
+  Expected<bool> LMResultOr = linkLLVMModules(ModuleName);
+  if (!LMResultOr)
+    return LMResultOr.takeError();
 
-    // link at llvm::Module level.
-    ErrorOrSuccess lmResult = linkLLVMModules(moduleName);
-    if (lmResult.isError())
-      return Error(lmResult.getError());
-
-    prepareMachineModuleInfo(llvmTargetMachine);
-
-    // Function ordering may be changed in the linkedModule due to Linker,
-    // but the original order matters for NVPTX backend to generate function
-    // declaration properly to avoid use before def/decl illegal instructions.
-    // Sort the linkedModule's functions back to to its original order
-    // (only definition matter, declaration doesn't).
-    if (isNVPTXBackend(options)) {
-      linkedModule->getFunctionList().sort(
-          [&](const auto &lhs, const auto &rhs) {
-            if (lhs.isDeclaration() && rhs.isDeclaration())
-              return true;
-
-            if (lhs.isDeclaration())
-              return false;
-
-            if (rhs.isDeclaration())
-              return true;
-
-            auto iter1 = originalFnOrdering.find(lhs.getName());
-            if (iter1 == originalFnOrdering.end())
-              return true;
-            auto iter2 = originalFnOrdering.find(rhs.getName());
-            if (iter2 == originalFnOrdering.end())
-              return true;
-
-            return iter1->second < iter2->second;
-          });
-    }
-  } else {
-    oneSplitModule = getModuleToPrintOneSplit(llvmTargetMachine);
-    oneSplitModule->setModuleIdentifier(moduleName);
-  }
+  prepareMachineModuleInfo(LLVMTgtMachine);
 
   // Prepare AsmPrint pipeline.
-  WriteableBufferRef linkedObj = WriteableBuffer::get();
-
-  llvm::legacy::PassManager passMgr;
+  llvm::legacy::PassManager PassMgr;
+  SmallString<1024> Buf;
+  raw_svector_ostream BufOS(Buf);
   // Add an appropriate TargetLibraryInfo pass for the module's triple.
-  llvm::TargetLibraryInfoImpl targetLibInfo(llvm::Triple(options.targetTriple));
+  llvm::TargetLibraryInfoImpl TargetLibInfo(TgtMachine.getTargetTriple());
 
   // Add AsmPrint pass and run the pass manager.
-  passMgr.add(new llvm::TargetLibraryInfoWrapperPass(targetLibInfo));
-  if (KGEN::addPassesToAsmPrint(options, llvmTargetMachine, passMgr, *linkedObj,
-                                emitAssembly
-                                    ? llvm::CodeGenFileType::AssemblyFile
-                                    : llvm::CodeGenFileType::ObjectFile,
-                                true, machineModInfoPass, mcInfos)) {
+  PassMgr.add(new llvm::TargetLibraryInfoWrapperPass(TargetLibInfo));
+  if (llvm::mclinker::addPassesToAsmPrint(LLVMTgtMachine, PassMgr, BufOS,
+                                          CodegenType, true, MachineModInfoPass,
+                                          McInfos)) {
     // Release some of the AsyncValue memory to avoid
     // wrong version of LLVMContext destructor being called due to
     // multiple LLVM being statically linked in dylibs that have
     // access to this code path.
-    for (SymbolAndMCInfo *smcInfo : symbolAndMCInfos)
-      smcInfo->clear();
+    for (SymbolAndMCInfo *SmcInfo : SymbolAndMCInfos)
+      SmcInfo->clear();
 
-    return Error("failed to add to ObjectFile Print pass");
+    return make_error<StringError>("failed to add to ObjectFile Print pass",
+                                   inconvertibleErrorCode());
   }
 
+  std::unique_ptr<WritableMemoryBuffer> LinkedObj =
+      WritableMemoryBuffer::getNewUninitMemBuffer(Buf.size());
+  memcpy(LinkedObj->getBufferStart(), Buf.c_str(), Buf.size());
+
   const_cast<llvm::TargetLoweringObjectFile *>(
-      llvmTargetMachine.getObjFileLowering())
-      ->Initialize(machineModInfoPass->getMMI().getContext(), targetMachine);
+      LLVMTgtMachine.getObjFileLowering())
+      ->Initialize(MachineModInfoPass->getMMI().getContext(), TgtMachine);
 
-  llvm::Module &moduleToRun = hasOneSplit ? *oneSplitModule : *linkedModule;
-  passMgr.run(moduleToRun);
+  PassMgr.run(*LinkedModule);
 
   // Release some of the AsyncValue memory to avoid
   // wrong version of LLVMContext destructor being called due to
   // multiple LLVM being statically linked in dylibs that have
   // access to this code path.
-  for (SymbolAndMCInfo *smcInfo : symbolAndMCInfos)
-    smcInfo->clear();
+  for (SymbolAndMCInfo *SmcInfo : SymbolAndMCInfos)
+    SmcInfo->clear();
 
-  return linkedObj;
+  return LinkedObj;
 }
diff --git a/llvm/lib/MCLinker/MCLinkerUtils.cpp b/llvm/lib/MCLinker/MCLinkerUtils.cpp
new file mode 100644
index 0000000000000..c01487e7a9e11
--- /dev/null
+++ b/llvm/lib/MCLinker/MCLinkerUtils.cpp
@@ -0,0 +1,156 @@
+//===--- MCLinkerUtils.cpp - MCLinkerUtils-----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCLinkerUtils.h"
+#include "llvm/CodeGen/CodeGenTargetMachineImpl.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace {
+
+// Helpers to access private field of llvm::MachineModuleInfo::MachineFunctions.
+using MFAccessor = llvm::DenseMap<const llvm::Function *,
+                                  std::unique_ptr<llvm::MachineFunction>>
+    llvm::MachineModuleInfo::*;
+MFAccessor getMFAccessor();
+template <MFAccessor Instance> struct RobberMFFromMachineModuleInfo {
+  friend MFAccessor getMFAccessor() { return Instance; }
+};
+template struct RobberMFFromMachineModuleInfo<
+    &llvm::MachineModuleInfo::MachineFunctions>;
+
+// Helpers to access private field of llvm::MachineFunction::FunctionNumber.
+using MFNumberAccessor = unsigned llvm::MachineFunction::*;
+MFNumberAccessor getMFNumberAccessor();
+template <MFNumberAccessor Instance> struct RobberMFNumberFromMachineFunction {
+  friend MFNumberAccessor getMFNumberAccessor() { return Instance; }
+};
+template struct RobberMFNumberFromMachineFunction<
+    &llvm::MachineFunction::FunctionNumber>;
+
+// Helpers to access private field of llvm::MachineFunction::STI.
+using STIAccessor = const llvm::TargetSubtargetInfo *llvm::MachineFunction::*;
+STIAccessor getSTIAccessor();
+template <STIAccessor Instance> struct RobberSTIFromMachineFunction {
+  friend STIAccessor getSTIAccessor() { return Instance; }
+};
+template struct RobberSTIFromMachineFunction<&llvm::MachineFunction::STI>;
+
+// Helpers to access private field of llvm::MachineModuleInfo::NextFnNum.
+using NextFnNumAccessor = unsigned llvm::MachineModuleInfo::*;
+NextFnNumAccessor getNextFnNumAccessor();
+template <NextFnNumAccessor Instance>
+struct RobberNextFnNumFromMachineModuleInfo {
+  friend NextFnNumAccessor getNextFnNumAccessor() { return Instance; }
+};
+template struct RobberNextFnNumFromMachineModuleInfo<
+    &llvm::MachineModuleInfo::NextFnNum>;
+
+// Helpers to access private field of llvm::TargetMachine::STI.
+using MCSubtargetInfoAccessor =
+    std::unique_ptr<const llvm::MCSubtargetInfo> llvm::TargetMachine::*;
+MCSubtargetInfoAccessor getMCSubtargetInfo();
+template <MCSubtargetInfoAccessor Instance>
+struct RobberMCSubtargetInfoFromTargetMachine {
+  friend MCSubtargetInfoAccessor getMCSubtargetInfo() { return Instance; }
+};
+template struct RobberMCSubtargetInfoFromTargetMachine<
+    &llvm::TargetMachine::STI>;
+
+// Helpers to access private functions
+template <typename Tag> struct LLVMPrivateFnAccessor {
+  /* export it ... */
+  using type = typename Tag::type;
+  static type Ptr;
+};
+
+template <typename Tag>
+typename LLVMPrivateFnAccessor<Tag>::type LLVMPrivateFnAccessor<Tag>::Ptr;
+
+template <typename Tag, typename Tag::type p>
+struct LLVMPrivateFnAccessorRob : LLVMPrivateFnAccessor<Tag> {
+  /* fill it ... */
+  struct Filler {
+    Filler() { LLVMPrivateFnAccessor<Tag>::Ptr = p; }
+  };
+  static Filler FillerObj;
+};
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wglobal-constructors"
+template <typename Tag, typename Tag::type P>
+typename LLVMPrivateFnAccessorRob<Tag, P>::Filler
+    LLVMPrivateFnAccessorRob<Tag, P>::FillerObj;
+#pragma GCC diagnostic pop
+
+// Helpers to access private functions of llvm::MachineModuleInfo::NextFnNum.
+struct MCContextGetSymbolEntryAccessor {
+  using type = llvm::MCSymbolTableEntry &(llvm::MCContext::*)(llvm::StringRef);
+};
+template struct LLVMPrivateFnAccessorRob<MCContextGetSymbolEntryAccessor,
+                                         &llvm::MCContext::getSymbolTableEntry>;
+
+// Helpers to access private field of llvm::LLVMTargetMachine::reset.
+struct TargetMachineClearSubtargetMapAccessor {
+  using type = void (llvm::CodeGenTargetMachineImpl::*)();
+};
+template struct LLVMPrivateFnAccessorRob<
+    TargetMachineClearSubtargetMapAccessor,
+    &llvm::CodeGenTargetMachineImpl::reset>;
+
+} // namespace
+
+llvm::DenseMap<const llvm::Function *, std::unique_ptr<llvm::MachineFunction>> &
+llvm::mclinker::getMachineFunctionsFromMachineModuleInfo(
+    llvm::MachineModuleInfo &MachineModuleInfo) {
+  return std::invoke(getMFAccessor(), MachineModuleInfo);
+}
+
+void llvm::mclinker::setMachineFunctionNumber(llvm::MachineFunction &Mf,
+                                              unsigned Number) {
+  unsigned &OrigNumber = std::invoke(getMFNumberAccessor(), Mf);
+  OrigNumber = Number;
+}
+
+void llvm::mclinker::setNextFnNum(llvm::MachineModuleInfo &MMI,
+                                  unsigned Value) {
+  unsigned &NextFnNum = std::invoke(getNextFnNumAccessor(), MMI);
+  NextFnNum = Value;
+}
+
+llvm::MCSymbolTableEntry &
+llvm::mclinker::getMCContextSymbolTableEntry(llvm::StringRef Name,
+                                             llvm::MCContext &McContext) {
+  return (McContext.*
+          LLVMPrivateFnAccessor<MCContextGetSymbolEntryAccessor>::Ptr)(Name);
+}
+
+void llvm::mclinker::releaseTargetMachineConstants(llvm::TargetMachine &TM) {
+  std::unique_ptr<const llvm::MCSubtargetInfo> &McSubtargetInfo =
+      std::invoke(getMCSubtargetInfo(), TM);
+  McSubtargetInfo.reset();
+
+  llvm::CodeGenTargetMachineImpl &TgtMachine =
+      static_cast<llvm::CodeGenTargetMachineImpl &>(TM);
+  (TgtMachine.*
+   LLVMPrivateFnAccessor<TargetMachineClearSubtargetMapAccessor>::Ptr)();
+}
+
+void llvm::mclinker::resetSubtargetInfo(llvm::TargetMachine &Dst,
+                                        llvm::MachineModuleInfo &MMI) {
+
+  llvm::DenseMap<const llvm::Function *, std::unique_ptr<llvm::MachineFunction>>
+      &MFs = getMachineFunctionsFromMachineModuleInfo(MMI);
+
+  for (auto &[Fn, MF] : MFs) {
+    const llvm::TargetSubtargetInfo *NewSTI = Dst.getSubtargetImpl(*Fn);
+    const llvm::TargetSubtargetInfo *&STI = std::invoke(getSTIAccessor(), MF);
+    STI = NewSTI;
+  }
+}
diff --git a/llvm/lib/MCLinker/MCLinkerUtils.h b/llvm/lib/MCLinker/MCLinkerUtils.h
new file mode 100644
index 0000000000000..497f786334885
--- /dev/null
+++ b/llvm/lib/MCLinker/MCLinkerUtils.h
@@ -0,0 +1,63 @@
+//===- MCLinkerUtils.h - MCLinker utility Functions -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCLINKER_MCLINKERUTILS_H
+#define LLVM_MCLINKER_MCLINKERUTILS_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbolTableEntry.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+namespace mclinker {
+// A few helper functions to access LLVM private class/struct members:
+// http://bloglitb.blogspot.com/2010/07/access-to-private-members-thats-easy.html
+
+/// Wrapping accessing LLVM data structure's private filed accessor for
+/// linking at MC-level where a few things need to be globalized such as:
+/// - llvm::MachineFunction's numbering,
+/// - all unique_ptrs of llvm::MachineFunctions in each split to be put
+///   together for the final AsmPrint
+/// - MCSymbol propagation for external global symbols to each split's
+///   MCContext to avoid duplicates for X86's OrcJIT execution engine.
+
+/// Get private field
+/// DenseMap<const Function*, std::unique_ptr<MachineFunction>> MachineFunctions
+/// from llvm::MachineModuleInfo.
+llvm::DenseMap<const llvm::Function *, std::unique_ptr<llvm::MachineFunction>> &
+getMachineFunctionsFromMachineModuleInfo(llvm::MachineModuleInfo &);
+
+/// Set private field FunctionNumber in llvm::MachineFunction.
+void setMachineFunctionNumber(llvm::MachineFunction &, unsigned);
+
+/// Set private field NextFnNum in llvm::MachineModuleInfo.
+void setNextFnNum(llvm::MachineModuleInfo &, unsigned);
+
+/// Call private member function
+/// MCSymbolTableEntry &getSymbolTableEntry(StringRef Name)
+/// from llvm::MCContext.
+llvm::MCSymbolTableEntry &getMCContextSymbolTableEntry(llvm::StringRef,
+                                                       llvm::MCContext &);
+
+/// Release MCSubTargetInfo.
+void releaseTargetMachineConstants(llvm::TargetMachine &);
+
+/// Clear SubtargetMap in SubtargetInfo.
+void resetSubtargetInfo(llvm::TargetMachine &, llvm::MachineModuleInfo &);
+
+} // namespace mclinker
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/MCLinker/MCPipeline.cpp b/llvm/lib/MCLinker/MCPipeline.cpp
new file mode 100644
index 0000000000000..9ea69d1ab226d
--- /dev/null
+++ b/llvm/lib/MCLinker/MCPipeline.cpp
@@ -0,0 +1,159 @@
+//===--- MCPipeline.cpp - MCPipeline ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCLinker/MCPipeline.h"
+
+#include "MCLinkerUtils.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Passes/PassBuilder.h"
+
+using namespace llvm;
+using namespace llvm::mclinker;
+
+namespace {
+class SetMachineFunctionBasePass : public llvm::ImmutablePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  SetMachineFunctionBasePass(llvm::MachineModuleInfo &MMI, unsigned Base);
+
+  // Initialization and Finalization
+  bool doInitialization(llvm::Module &) override;
+  bool doFinalization(llvm::Module &) override;
+
+private:
+  llvm::MachineModuleInfo &MMI;
+  unsigned Base;
+};
+} // namespace
+
+char SetMachineFunctionBasePass::ID;
+
+SetMachineFunctionBasePass::SetMachineFunctionBasePass(
+    llvm::MachineModuleInfo &MMI, unsigned Base)
+    : llvm::ImmutablePass(ID), MMI(MMI), Base(Base) {}
+
+// Initialization and Finalization
+bool SetMachineFunctionBasePass::doInitialization(llvm::Module &) {
+  setNextFnNum(MMI, Base);
+  return false;
+}
+
+bool SetMachineFunctionBasePass::doFinalization(llvm::Module &) {
+  return false;
+}
+
+/// Build a pipeline that does machine specific codgen but stops before
+/// AsmPrint. Returns true if failed.
+bool llvm::mclinker::addPassesToEmitMC(
+    llvm::TargetMachine &TgtMachine, llvm::legacy::PassManagerBase &PM,
+    llvm::raw_pwrite_stream &Out, bool DisableVerify,
+    llvm::MachineModuleInfoWrapperPass *MMIWP, unsigned NumFnBase) {
+  // Targets may override createPassConfig to provide a target-specific
+  // subclass.
+  TargetPassConfig *PassConfig = TgtMachine.createPassConfig(PM);
+
+  // Set PassConfig options provided by TargetMachine.
+  PassConfig->setDisableVerify(DisableVerify);
+  PM.add(PassConfig);
+  PM.add(MMIWP);
+
+  auto *SetFnBaseP = new SetMachineFunctionBasePass(MMIWP->getMMI(), NumFnBase);
+  PM.add(SetFnBaseP);
+
+  if (PassConfig->addISelPasses())
+    return true;
+
+  PassConfig->addMachinePasses();
+  PassConfig->setInitialized();
+
+  return false;
+}
+
+/// Function pass to populate external MCSymbols to other llvm module split's
+/// MCContext so that they can be unique across all splits. This uniqueing
+/// is required for ORCJIT (not for generating binary .o).
+namespace {
+class SyncX86SymbolTables : public MachineFunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit SyncX86SymbolTables(SmallVectorImpl<MCInfo *> &);
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+  SmallVectorImpl<MCInfo *> &McInfos;
+  DenseSet<StringRef> ExternSymbols;
+
+  // Populate MCSymbol to all the MCContexts.
+  void populateSymbol(StringRef, const MCSymbolTableValue &, MCContext *);
+};
+} // namespace
+
+char SyncX86SymbolTables::ID;
+
+SyncX86SymbolTables::SyncX86SymbolTables(SmallVectorImpl<MCInfo *> &McInfos)
+    : MachineFunctionPass(ID), McInfos(McInfos) {}
+
+void SyncX86SymbolTables::populateSymbol(StringRef Name,
+                                         const llvm::MCSymbolTableValue &Value,
+                                         MCContext *SrcCtx) {
+  for (MCInfo *McInfo : McInfos) {
+    MCContext &CurrCtx = *McInfo->McContext;
+    if (&CurrCtx == SrcCtx)
+      continue;
+    MCSymbolTableEntry &Entry =
+        llvm::mclinker::getMCContextSymbolTableEntry(Name, CurrCtx);
+    if (!Entry.second.Symbol) {
+      Entry.second.Symbol = Value.Symbol;
+      Entry.second.NextUniqueID = Value.NextUniqueID;
+      Entry.second.Used = Value.Used;
+    }
+  }
+}
+
+bool SyncX86SymbolTables::runOnMachineFunction(MachineFunction &MF) {
+  MCContext &Ctx = MF.getContext();
+  for (auto &[Name, SymbolEntry] : Ctx.getSymbols()) {
+    if (!SymbolEntry.Symbol || !SymbolEntry.Symbol->isExternal() ||
+        ExternSymbols.contains(Name))
+      continue;
+    ExternSymbols.insert(Name);
+    populateSymbol(Name, SymbolEntry, &Ctx);
+  }
+  return false;
+}
+
+/// Build a pipeline that does AsmPrint only.
+/// Returns true if failed.
+bool llvm::mclinker::addPassesToAsmPrint(
+    llvm::TargetMachine &TgtMachine, llvm::legacy::PassManagerBase &PM,
+    llvm::raw_pwrite_stream &Out, llvm::CodeGenFileType FileType,
+    bool DisableVerify, llvm::MachineModuleInfoWrapperPass *MMIWP,
+    llvm::SmallVectorImpl<MCInfo *> &McInfos) {
+  TargetPassConfig *PassConfig = TgtMachine.createPassConfig(PM);
+  if (!PassConfig)
+    return true;
+  // Set PassConfig options provided by TargetMachine.
+  PassConfig->setDisableVerify(DisableVerify);
+  PM.add(PassConfig);
+  PM.add(MMIWP);
+  PassConfig->setInitialized();
+
+  bool Result = TgtMachine.addAsmPrinter(PM, Out, nullptr, FileType,
+                                         MMIWP->getMMI().getContext());
+
+  if (TgtMachine.getTargetTriple().isX86())
+    PM.add(new SyncX86SymbolTables(McInfos));
+  return Result;
+}



More information about the llvm-commits mailing list