[llvm] [offload][SYCL] Add Module splitting by categories. (PR #131347)

Maksim Sabianin via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 25 05:46:44 PDT 2025


================
@@ -70,6 +77,163 @@ static cl::opt<std::string>
     MCPU("mcpu", cl::desc("Target CPU, ignored if --mtriple is not used"),
          cl::value_desc("cpu"), cl::cat(SplitCategory));
 
+enum class SplitByCategoryType {
+  SBCT_ByModuleId,
+  SBCT_ByKernel,
+  SBCT_None,
+};
+
+static cl::opt<SplitByCategoryType> SplitByCategory(
+    "split-by-category",
+    cl::desc("Split by category. If present, splitting by category is used "
+             "with the specified categorization type."),
+    cl::Optional, cl::init(SplitByCategoryType::SBCT_None),
+    cl::values(clEnumValN(SplitByCategoryType::SBCT_ByModuleId, "module-id",
+                          "one output module per translation unit marked with "
+                          "\"module-id\" attribute"),
+               clEnumValN(SplitByCategoryType::SBCT_ByKernel, "kernel",
+                          "one output module per kernel")),
+    cl::cat(SplitCategory));
+
+static cl::opt<bool> OutputAssembly{
+    "S", cl::desc("Write output as LLVM assembly"), cl::cat(SplitCategory)};
+
+void writeStringToFile(StringRef Content, StringRef Path) {
+  std::error_code EC;
+  raw_fd_ostream OS(Path, EC);
+  if (EC) {
+    errs() << formatv("error opening file: {0}, error: {1}\n", Path,
+                      EC.message());
+    exit(1);
+  }
+
+  OS << Content << "\n";
+}
+
+void writeModuleToFile(const Module &M, StringRef Path, bool OutputAssembly) {
+  int FD = -1;
+  if (std::error_code EC = sys::fs::openFileForWrite(Path, FD)) {
+    errs() << formatv("error opening file: {0}, error: {1}", Path, EC.message())
+           << '\n';
+    exit(1);
+  }
+
+  raw_fd_ostream OS(FD, /*ShouldClose*/ true);
+  if (OutputAssembly)
+    M.print(OS, /*AssemblyAnnotationWriter*/ nullptr);
+  else
+    WriteBitcodeToFile(M, OS);
+}
+
+/// FunctionCategorizer is used for splitting by category either by module-id or
+/// by kernels. It doesn't provide categories for functions other than kernels.
+/// Categorizer computes a string key for the given Function and records the
+/// association between the string key and an integer category. If a string key
+/// is already belongs to some category than the corresponding integer category
+/// is returned.
+class FunctionCategorizer {
+public:
+  FunctionCategorizer(SplitByCategoryType Type) : Type(Type) {}
+
+  FunctionCategorizer() = delete;
+  FunctionCategorizer(FunctionCategorizer &) = delete;
+  FunctionCategorizer &operator=(const FunctionCategorizer &) = delete;
+  FunctionCategorizer(FunctionCategorizer &&) = default;
+  FunctionCategorizer &operator=(FunctionCategorizer &&) = default;
+
+  /// Returns integer specifying the category for the given \p F.
+  /// If the given function isn't a kernel then returns std::nullopt.
+  std::optional<int> operator()(const Function &F) {
+    if (!isEntryPoint(F))
+      return std::nullopt; // skip the function.
+
+    auto StringKey = computeFunctionCategory(Type, F);
+    if (auto it = StrKeyToID.find(StringRef(StringKey)); it != StrKeyToID.end())
+      return it->second;
+
+    int ID = static_cast<int>(StrKeyToID.size());
+    return StrKeyToID.try_emplace(std::move(StringKey), ID).first->second;
+  }
+
+private:
+  static bool isEntryPoint(const Function &F) {
+    if (F.isDeclaration())
+      return false;
+
+    return F.getCallingConv() == CallingConv::SPIR_KERNEL ||
+           F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
+           F.getCallingConv() == CallingConv::PTX_Kernel;
+  }
+
+  static SmallString<0> computeFunctionCategory(SplitByCategoryType Type,
+                                                const Function &F) {
+    static constexpr char ATTR_MODULE_ID[] = "module-id";
+    SmallString<0> Key;
+    switch (Type) {
+    case SplitByCategoryType::SBCT_ByKernel:
+      Key = F.getName().str();
+      break;
+    case SplitByCategoryType::SBCT_ByModuleId:
+      Key = F.getFnAttribute(ATTR_MODULE_ID).getValueAsString().str();
+      break;
+    default:
+      llvm_unreachable("unexpected mode.");
+    }
+
+    return Key;
+  }
+
+private:
+  struct KeyInfo {
+    static SmallString<0> getEmptyKey() { return SmallString<0>(""); }
+
+    static SmallString<0> getTombstoneKey() { return SmallString<0>("-"); }
+
+    static bool isEqual(const SmallString<0> &LHS, const SmallString<0> &RHS) {
+      return LHS == RHS;
+    }
+
+    static unsigned getHashValue(const SmallString<0> &S) {
+      return llvm::hash_value(StringRef(S));
+    }
+  };
+
+  SplitByCategoryType Type;
+  DenseMap<SmallString<0>, int, KeyInfo> StrKeyToID;
+};
+
+void cleanupModule(Module &M) {
+  ModuleAnalysisManager MAM;
+  MAM.registerPass([&] { return PassInstrumentationAnalysis(); });
+  ModulePassManager MPM;
+  MPM.addPass(GlobalDCEPass()); // Delete unreachable globals.
+  MPM.run(M, MAM);
+}
+
+Error runSplitModuleByCategory(std::unique_ptr<Module> M) {
+  size_t OutputID = 0;
+  auto PostSplitCallback = [&](std::unique_ptr<Module> MPart) {
+    if (verifyModule(*MPart)) {
+      errs() << "Broken Module!\n";
+      exit(1);
----------------
maksimsab wrote:

Yes, I repeated the already used approach below in `HandleModulePart`.

https://github.com/llvm/llvm-project/pull/131347


More information about the llvm-commits mailing list