[llvm] [offload][SYCL] Add SYCL Module splitting (PR #119713)
Maksim Sabianin via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 10 08:42:36 PDT 2025
https://github.com/maksimsab updated https://github.com/llvm/llvm-project/pull/119713
>From bf489154f2e923e0f10d51b7f79075c36fb6ccd6 Mon Sep 17 00:00:00 2001
From: "Sabianin, Maksim" <maksim.sabianin at intel.com>
Date: Thu, 12 Dec 2024 06:25:20 -0800
Subject: [PATCH 01/14] [offload][SYCL] Add SYCL Module splitting
This patch adds SYCL Module splitting - the necessary step in the SYCL
compilation pipeline. Only 2 splitting modes are being added: by kernel and by
source.
---
.../llvm/Transforms/Utils/SYCLModuleSplit.h | 71 +++
.../include/llvm/Transforms/Utils/SYCLUtils.h | 26 +
llvm/lib/Transforms/Utils/CMakeLists.txt | 2 +
llvm/lib/Transforms/Utils/SYCLModuleSplit.cpp | 513 ++++++++++++++++++
llvm/lib/Transforms/Utils/SYCLUtils.cpp | 27 +
.../device-code-split/amd-kernel-split.ll | 17 +
.../device-code-split/auto-module-split-1.ll | 120 ++++
.../auto-module-split-func-ptr.ll | 50 ++
.../device-code-split/basic-module-split.ll | 122 +++++
.../complex-indirect-call-chain.ll | 67 +++
.../one-kernel-per-module.ll | 135 +++++
.../per-reqd-sub-group-size-split-1.ll | 105 ++++
.../per-reqd-wg-size-split-1.ll | 105 ++++
.../split-with-kernel-declarations.ll | 53 ++
llvm/tools/llvm-split/llvm-split.cpp | 74 +++
15 files changed, 1487 insertions(+)
create mode 100644 llvm/include/llvm/Transforms/Utils/SYCLModuleSplit.h
create mode 100644 llvm/include/llvm/Transforms/Utils/SYCLUtils.h
create mode 100644 llvm/lib/Transforms/Utils/SYCLModuleSplit.cpp
create mode 100644 llvm/lib/Transforms/Utils/SYCLUtils.cpp
create mode 100644 llvm/test/tools/llvm-split/SYCL/device-code-split/amd-kernel-split.ll
create mode 100644 llvm/test/tools/llvm-split/SYCL/device-code-split/auto-module-split-1.ll
create mode 100644 llvm/test/tools/llvm-split/SYCL/device-code-split/auto-module-split-func-ptr.ll
create mode 100644 llvm/test/tools/llvm-split/SYCL/device-code-split/basic-module-split.ll
create mode 100644 llvm/test/tools/llvm-split/SYCL/device-code-split/complex-indirect-call-chain.ll
create mode 100644 llvm/test/tools/llvm-split/SYCL/device-code-split/one-kernel-per-module.ll
create mode 100644 llvm/test/tools/llvm-split/SYCL/device-code-split/per-reqd-sub-group-size-split-1.ll
create mode 100644 llvm/test/tools/llvm-split/SYCL/device-code-split/per-reqd-wg-size-split-1.ll
create mode 100644 llvm/test/tools/llvm-split/SYCL/device-code-split/split-with-kernel-declarations.ll
diff --git a/llvm/include/llvm/Transforms/Utils/SYCLModuleSplit.h b/llvm/include/llvm/Transforms/Utils/SYCLModuleSplit.h
new file mode 100644
index 0000000000000..4df3e0321e9cd
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/SYCLModuleSplit.h
@@ -0,0 +1,71 @@
+//===-------- SYCLModuleSplit.h - module split ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Functionality to split a module into callgraphs. A callgraph here is a set
+// of entry points with all functions reachable from them via a call. The result
+// of the split is new modules containing corresponding callgraph.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYCL_MODULE_SPLIT_H
+#define LLVM_SYCL_MODULE_SPLIT_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+
+#include <memory>
+#include <optional>
+#include <string>
+
+namespace llvm {
+
+class Module;
+
+enum class IRSplitMode {
+ IRSM_PER_TU, // one module per translation unit
+ IRSM_PER_KERNEL, // one module per kernel
+ IRSM_NONE // no splitting
+};
+
+/// \returns IRSplitMode value if \p S is recognized. Otherwise, std::nullopt is
+/// returned.
+std::optional<IRSplitMode> convertStringToSplitMode(StringRef S);
+
+/// The structure represents a split LLVM Module accompanied by additional
+/// information. Split Modules are being stored at disk due to the high RAM
+/// consumption during the whole splitting process.
+struct SYCLSplitModule {
+ std::string ModuleFilePath;
+ std::string Symbols;
+
+ SYCLSplitModule() = default;
+ SYCLSplitModule(const SYCLSplitModule &) = default;
+ SYCLSplitModule &operator=(const SYCLSplitModule &) = default;
+ SYCLSplitModule(SYCLSplitModule &&) = default;
+ SYCLSplitModule &operator=(SYCLSplitModule &&) = default;
+
+ SYCLSplitModule(std::string_view File, std::string Symbols)
+ : ModuleFilePath(File), Symbols(std::move(Symbols)) {}
+};
+
+struct ModuleSplitterSettings {
+ IRSplitMode Mode;
+ bool OutputAssembly = false; // Bitcode or LLVM IR.
+ StringRef OutputPrefix;
+};
+
+/// Parses the string table.
+Expected<SmallVector<SYCLSplitModule, 0>>
+parseSYCLSplitModulesFromFile(StringRef File);
+
+/// Splits the given module \p M according to the given \p Settings.
+Expected<SmallVector<SYCLSplitModule, 0>>
+splitSYCLModule(std::unique_ptr<Module> M, ModuleSplitterSettings Settings);
+
+} // namespace llvm
+
+#endif // LLVM_SYCL_MODULE_SPLIT_H
diff --git a/llvm/include/llvm/Transforms/Utils/SYCLUtils.h b/llvm/include/llvm/Transforms/Utils/SYCLUtils.h
new file mode 100644
index 0000000000000..53dec1139cd8e
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/SYCLUtils.h
@@ -0,0 +1,26 @@
+//===------------ SYCLUtils.h - SYCL utility functions --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Utility functions for SYCL.
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_UTILS_SYCLUTILS_H
+#define LLVM_TRANSFORMS_UTILS_SYCLUTILS_H
+
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+
+using SYCLStringTable = std::vector<std::vector<std::string>>;
+
+void writeSYCLStringTable(const SYCLStringTable &Table, raw_ostream &OS);
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_SYCLUTILS_H
diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt
index 65bd3080662c4..530cba5275dcb 100644
--- a/llvm/lib/Transforms/Utils/CMakeLists.txt
+++ b/llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -82,6 +82,8 @@ add_llvm_component_library(LLVMTransformUtils
SizeOpts.cpp
SplitModule.cpp
StripNonLineTableDebugInfo.cpp
+ SYCLModuleSplit.cpp
+ SYCLUtils.cpp
SymbolRewriter.cpp
UnifyFunctionExitNodes.cpp
UnifyLoopExits.cpp
diff --git a/llvm/lib/Transforms/Utils/SYCLModuleSplit.cpp b/llvm/lib/Transforms/Utils/SYCLModuleSplit.cpp
new file mode 100644
index 0000000000000..e6a36a1fba969
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/SYCLModuleSplit.cpp
@@ -0,0 +1,513 @@
+//===-------- SYCLModuleSplitter.cpp - split a module into callgraphs -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// See comments in the header.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SYCLModuleSplit.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Bitcode/BitcodeWriterPass.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PassManagerImpl.h"
+#include "llvm/IRPrinter/IRPrintingPasses.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Transforms/IPO/GlobalDCE.h"
+#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
+#include "llvm/Transforms/IPO/StripSymbols.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/SYCLUtils.h"
+
+#include <map>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "sycl_module_split"
+
+static bool isKernel(const Function &F) {
+ return F.getCallingConv() == CallingConv::SPIR_KERNEL ||
+ F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
+}
+
+static bool isEntryPoint(const Function &F) {
+ // Skip declarations, if any: they should not be included into a vector of
+ // entry points groups or otherwise we will end up with incorrectly generated
+ // list of symbols.
+ if (F.isDeclaration())
+ return false;
+
+ // Kernels are always considered to be entry points
+ return isKernel(F);
+}
+
+namespace {
+
+// A vector that contains all entry point functions in a split module.
+using EntryPointSet = SetVector<const Function *>;
+
+/// Represents a named group entry points.
+struct EntryPointGroup {
+ std::string GroupName;
+ EntryPointSet Functions;
+
+ EntryPointGroup() = default;
+ EntryPointGroup(const EntryPointGroup &) = default;
+ EntryPointGroup &operator=(const EntryPointGroup &) = default;
+ EntryPointGroup(EntryPointGroup &&) = default;
+ EntryPointGroup &operator=(EntryPointGroup &&) = default;
+
+ EntryPointGroup(StringRef GroupName,
+ EntryPointSet Functions = EntryPointSet())
+ : GroupName(GroupName), Functions(std::move(Functions)) {}
+
+ void dump() const {
+ constexpr size_t INDENT = 4;
+ dbgs().indent(INDENT) << "ENTRY POINTS"
+ << " " << GroupName << " {\n";
+ for (const Function *F : Functions)
+ dbgs().indent(INDENT) << " " << F->getName() << "\n";
+
+ dbgs().indent(INDENT) << "}\n";
+ }
+};
+
+/// Annotates an llvm::Module with information necessary to perform and track
+/// the result of device code (llvm::Module instances) splitting:
+/// - entry points group from the module.
+class ModuleDesc {
+ std::unique_ptr<Module> M;
+ EntryPointGroup EntryPoints;
+
+public:
+ ModuleDesc() = delete;
+ ModuleDesc(const ModuleDesc &) = delete;
+ ModuleDesc &operator=(const ModuleDesc &) = delete;
+ ModuleDesc(ModuleDesc &&) = default;
+ ModuleDesc &operator=(ModuleDesc &&) = default;
+
+ ModuleDesc(std::unique_ptr<Module> M,
+ EntryPointGroup EntryPoints = EntryPointGroup())
+ : M(std::move(M)), EntryPoints(std::move(EntryPoints)) {
+ assert(this->M && "Module should be non-empty");
+ }
+
+ const EntryPointSet &entries() const { return EntryPoints.Functions; }
+ const EntryPointGroup &getEntryPointGroup() const { return EntryPoints; }
+ EntryPointSet &entries() { return EntryPoints.Functions; }
+ Module &getModule() { return *M; }
+ const Module &getModule() const { return *M; }
+
+ // Cleans up module IR - removes dead globals, debug info etc.
+ void cleanup() {
+ ModuleAnalysisManager MAM;
+ MAM.registerPass([&] { return PassInstrumentationAnalysis(); });
+ ModulePassManager MPM;
+ MPM.addPass(GlobalDCEPass()); // Delete unreachable globals.
+ MPM.addPass(StripDeadDebugInfoPass()); // Remove dead debug info.
+ MPM.addPass(StripDeadPrototypesPass()); // Remove dead func decls.
+ MPM.run(*M, MAM);
+ }
+
+ std::string makeSymbolTable() const {
+ SmallString<128> ST;
+ for (const Function *F : EntryPoints.Functions) {
+ ST += F->getName();
+ ST += "\n";
+ }
+
+ return std::string(ST);
+ }
+
+ void dump() const {
+ dbgs() << "ModuleDesc[" << M->getName() << "] {\n";
+ EntryPoints.dump();
+ dbgs() << "}\n";
+ }
+};
+
+// Represents "dependency" or "use" graph of global objects (functions and
+// global variables) in a module. It is used during device code split to
+// understand which global variables and functions (other than entry points)
+// should be included into a split module.
+//
+// Nodes of the graph represent LLVM's GlobalObjects, edges "A" -> "B" represent
+// the fact that if "A" is included into a module, then "B" should be included
+// as well.
+//
+// Examples of dependencies which are represented in this graph:
+// - Function FA calls function FB
+// - Function FA uses global variable GA
+// - Global variable GA references (initialized with) function FB
+// - Function FA stores address of a function FB somewhere
+//
+// The following cases are treated as dependencies between global objects:
+// 1. Global object A is used within by a global object B in any way (store,
+// bitcast, phi node, call, etc.): "A" -> "B" edge will be added to the
+// graph;
+// 2. function A performs an indirect call of a function with signature S and
+// there is a function B with signature S. "A" -> "B" edge will be added to
+// the graph;
+class DependencyGraph {
+public:
+ using GlobalSet = SmallPtrSet<const GlobalValue *, 16>;
+
+ DependencyGraph(const Module &M) {
+ // Group functions by their signature to handle case (2) described above
+ DenseMap<const FunctionType *, DependencyGraph::GlobalSet>
+ FuncTypeToFuncsMap;
+ for (const auto &F : M.functions()) {
+ // Kernels can't be called (either directly or indirectly) in SYCL
+ if (isKernel(F))
+ continue;
+
+ FuncTypeToFuncsMap[F.getFunctionType()].insert(&F);
+ }
+
+ for (const auto &F : M.functions()) {
+ // case (1), see comment above the class definition
+ for (const Value *U : F.users())
+ addUserToGraphRecursively(cast<const User>(U), &F);
+
+ // case (2), see comment above the class definition
+ for (const auto &I : instructions(F)) {
+ const auto *CI = dyn_cast<CallInst>(&I);
+ if (!CI || !CI->isIndirectCall()) // Direct calls were handled above
+ continue;
+
+ const FunctionType *Signature = CI->getFunctionType();
+ const auto &PotentialCallees = FuncTypeToFuncsMap[Signature];
+ Graph[&F].insert(PotentialCallees.begin(), PotentialCallees.end());
+ }
+ }
+
+ // And every global variable (but their handling is a bit simpler)
+ for (const auto &GV : M.globals())
+ for (const Value *U : GV.users())
+ addUserToGraphRecursively(cast<const User>(U), &GV);
+ }
+
+ iterator_range<GlobalSet::const_iterator>
+ dependencies(const GlobalValue *Val) const {
+ auto It = Graph.find(Val);
+ return (It == Graph.end())
+ ? make_range(EmptySet.begin(), EmptySet.end())
+ : make_range(It->second.begin(), It->second.end());
+ }
+
+private:
+ void addUserToGraphRecursively(const User *Root, const GlobalValue *V) {
+ SmallVector<const User *, 8> WorkList;
+ WorkList.push_back(Root);
+
+ while (!WorkList.empty()) {
+ const User *U = WorkList.pop_back_val();
+ if (const auto *I = dyn_cast<const Instruction>(U)) {
+ const auto *UFunc = I->getFunction();
+ Graph[UFunc].insert(V);
+ } else if (isa<const Constant>(U)) {
+ if (const auto *GV = dyn_cast<const GlobalVariable>(U))
+ Graph[GV].insert(V);
+ // This could be a global variable or some constant expression (like
+ // bitcast or gep). We trace users of this constant further to reach
+ // global objects they are used by and add them to the graph.
+ for (const auto *UU : U->users())
+ WorkList.push_back(UU);
+ } else
+ llvm_unreachable("Unhandled type of function user");
+ }
+ }
+
+ DenseMap<const GlobalValue *, GlobalSet> Graph;
+ SmallPtrSet<const GlobalValue *, 1> EmptySet;
+};
+
+void collectFunctionsAndGlobalVariablesToExtract(
+ SetVector<const GlobalValue *> &GVs, const Module &M,
+ const EntryPointGroup &ModuleEntryPoints, const DependencyGraph &DG) {
+ // We start with module entry points
+ for (const auto *F : ModuleEntryPoints.Functions)
+ GVs.insert(F);
+
+ // Non-discardable global variables are also include into the initial set
+ for (const auto &GV : M.globals())
+ if (!GV.isDiscardableIfUnused())
+ GVs.insert(&GV);
+
+ // GVs has SetVector type. This type inserts a value only if it is not yet
+ // present there. So, recursion is not expected here.
+ size_t Idx = 0;
+ while (Idx < GVs.size()) {
+ const GlobalValue *Obj = GVs[Idx++];
+
+ for (const GlobalValue *Dep : DG.dependencies(Obj)) {
+ if (const auto *Func = dyn_cast<const Function>(Dep)) {
+ if (!Func->isDeclaration())
+ GVs.insert(Func);
+ } else
+ GVs.insert(Dep); // Global variables are added unconditionally
+ }
+ }
+}
+
+ModuleDesc extractSubModule(const ModuleDesc &MD,
+ const SetVector<const GlobalValue *> &GVs,
+ EntryPointGroup ModuleEntryPoints) {
+ const Module &M = MD.getModule();
+ // For each group of entry points collect all dependencies.
+ ValueToValueMapTy VMap;
+ // Clone definitions only for needed globals. Others will be added as
+ // declarations and removed later.
+ std::unique_ptr<Module> SubM = CloneModule(
+ M, VMap, [&](const GlobalValue *GV) { return GVs.count(GV); });
+ // Replace entry points with cloned ones.
+ EntryPointSet NewEPs;
+ const EntryPointSet &EPs = ModuleEntryPoints.Functions;
+ std::for_each(EPs.begin(), EPs.end(), [&](const Function *F) {
+ NewEPs.insert(cast<Function>(VMap[F]));
+ });
+ ModuleEntryPoints.Functions = std::move(NewEPs);
+ return ModuleDesc{std::move(SubM), std::move(ModuleEntryPoints)};
+}
+
+// The function produces a copy of input LLVM IR module M with only those
+// functions and globals that can be called from entry points that are specified
+// in ModuleEntryPoints vector, in addition to the entry point functions.
+ModuleDesc extractCallGraph(const ModuleDesc &MD,
+ EntryPointGroup ModuleEntryPoints,
+ const DependencyGraph &DG) {
+ SetVector<const GlobalValue *> GVs;
+ collectFunctionsAndGlobalVariablesToExtract(GVs, MD.getModule(),
+ ModuleEntryPoints, DG);
+
+ ModuleDesc SplitM = extractSubModule(MD, GVs, std::move(ModuleEntryPoints));
+ LLVM_DEBUG(SplitM.dump());
+ SplitM.cleanup();
+ return SplitM;
+}
+
+using EntryPointGroupVec = SmallVector<EntryPointGroup, 0>;
+
+/// Module Splitter.
+/// It gets a module (in a form of module descriptor, to get additional info)
+/// and a collection of entry points groups. Each group specifies subset entry
+/// points from input module that should be included in a split module.
+class ModuleSplitter {
+private:
+ ModuleDesc Input;
+ EntryPointGroupVec Groups;
+ DependencyGraph DG;
+
+private:
+ EntryPointGroup drawEntryPointGroup() {
+ assert(Groups.size() > 0 && "Reached end of entry point groups list.");
+ EntryPointGroup Group = std::move(Groups.back());
+ Groups.pop_back();
+ return Group;
+ }
+
+public:
+ ModuleSplitter(ModuleDesc MD, EntryPointGroupVec GroupVec)
+ : Input(std::move(MD)), Groups(std::move(GroupVec)),
+ DG(Input.getModule()) {
+ assert(!Groups.empty() && "Entry points groups collection is empty!");
+ }
+
+ /// Gets next subsequence of entry points in an input module and provides
+ /// split submodule containing these entry points and their dependencies.
+ ModuleDesc getNextSplit() {
+ return extractCallGraph(Input, drawEntryPointGroup(), DG);
+ }
+
+ /// Check that there are still submodules to split.
+ bool hasMoreSplits() const { return Groups.size() > 0; }
+};
+
+} // namespace
+
+/// Gets attached attribute value if it is present. Otherwise returns empty
+/// stirng.
+static StringRef computeFunctionCategoryFromStringMetadata(const Function &F,
+ StringRef AttrName) {
+ return F.getFnAttribute(AttrName).getValueAsString();
+}
+
+static EntryPointGroupVec selectEntryPointGroups(const Module &M,
+ IRSplitMode Mode) {
+ // std::map is used here to ensure stable ordering of entry point groups,
+ // which is based on their contents, this greatly helps LIT tests
+ std::map<std::string, EntryPointSet> EntryPointsMap;
+
+ static constexpr char ATTR_SYCL_MODULE_ID[] = "sycl-module-id";
+ for (const auto &F : M.functions()) {
+ if (!isEntryPoint(F))
+ continue;
+
+ std::string Key;
+ switch (Mode) {
+ case IRSplitMode::IRSM_PER_KERNEL:
+ Key = F.getName();
+ break;
+ case IRSplitMode::IRSM_PER_TU:
+ Key = computeFunctionCategoryFromStringMetadata(F, ATTR_SYCL_MODULE_ID);
+ break;
+ case IRSplitMode::IRSM_NONE:
+ llvm_unreachable("");
+ }
+
+ EntryPointsMap[Key].insert(&F);
+ }
+
+ EntryPointGroupVec Groups;
+ if (EntryPointsMap.empty()) {
+ // No entry points met, record this.
+ Groups.emplace_back("-", EntryPointSet());
+ } else {
+ Groups.reserve(EntryPointsMap.size());
+ // Start with properties of a source module
+ for (auto &[Key, EntryPoints] : EntryPointsMap)
+ Groups.emplace_back(Key, std::move(EntryPoints));
+ }
+
+ return Groups;
+}
+
+static Error saveModuleIRInFile(Module &M, StringRef FilePath,
+ bool OutputAssembly) {
+ int FD = -1;
+ if (std::error_code EC = sys::fs::openFileForWrite(FilePath, FD))
+ return errorCodeToError(EC);
+
+ raw_fd_ostream OS(FD, true);
+ ModulePassManager MPM;
+ ModuleAnalysisManager MAM;
+ MAM.registerPass([&] { return PassInstrumentationAnalysis(); });
+ if (OutputAssembly)
+ MPM.addPass(PrintModulePass(OS));
+ else
+ MPM.addPass(BitcodeWriterPass(OS));
+
+ MPM.run(M, MAM);
+ return Error::success();
+}
+
+static Expected<SYCLSplitModule>
+saveModuleDesc(ModuleDesc &MD, std::string Prefix, bool OutputAssembly) {
+ Prefix += OutputAssembly ? ".ll" : ".bc";
+ if (Error E = saveModuleIRInFile(MD.getModule(), Prefix, OutputAssembly))
+ return E;
+
+ SYCLSplitModule SM;
+ SM.ModuleFilePath = Prefix;
+ SM.Symbols = MD.makeSymbolTable();
+ return SM;
+}
+
+namespace llvm {
+
+Expected<SmallVector<SYCLSplitModule, 0>>
+parseSYCLSplitModulesFromFile(StringRef File) {
+ auto EntriesMBOrErr = llvm::MemoryBuffer::getFile(File);
+ if (!EntriesMBOrErr)
+ return createFileError(File, EntriesMBOrErr.getError());
+
+ line_iterator LI(**EntriesMBOrErr);
+ if (LI.is_at_eof() || *LI != "[Code|Symbols]")
+ return createStringError(inconvertibleErrorCode(),
+ "invalid SYCL Table file.");
+
+ // "Code" and "Symbols" at the moment.
+ static constexpr int NUMBER_COLUMNS = 2;
+ ++LI;
+ SmallVector<SYCLSplitModule, 0> Modules;
+ while (!LI.is_at_eof()) {
+ StringRef Line = *LI;
+ if (Line.empty())
+ return createStringError("invalid SYCL table row.");
+
+ SmallVector<StringRef, NUMBER_COLUMNS> Parts;
+ Line.split(Parts, "|");
+ if (Parts.size() != NUMBER_COLUMNS)
+ return createStringError("invalid SYCL Table row.");
+
+ auto [IRFilePath, SymbolsFilePath] = std::tie(Parts[0], Parts[1]);
+ if (SymbolsFilePath.empty())
+ return createStringError("invalid SYCL Table row.");
+
+ auto MBOrErr = MemoryBuffer::getFile(SymbolsFilePath);
+ if (!MBOrErr)
+ return createFileError(SymbolsFilePath, MBOrErr.getError());
+
+ auto &MB2 = *MBOrErr;
+ std::string Symbols =
+ std::string(MB2->getBufferStart(), MB2->getBufferEnd());
+ Modules.emplace_back(IRFilePath, std::move(Symbols));
+ ++LI;
+ }
+
+ return Modules;
+}
+
+std::optional<IRSplitMode> convertStringToSplitMode(StringRef S) {
+ static const StringMap<IRSplitMode> Values = {
+ {"source", IRSplitMode::IRSM_PER_TU},
+ {"kernel", IRSplitMode::IRSM_PER_KERNEL},
+ {"none", IRSplitMode::IRSM_NONE}};
+
+ auto It = Values.find(S);
+ if (It == Values.end())
+ return std::nullopt;
+
+ return It->second;
+}
+
+Expected<SmallVector<SYCLSplitModule, 0>>
+splitSYCLModule(std::unique_ptr<Module> M, ModuleSplitterSettings Settings) {
+ SmallVector<SYCLSplitModule, 0> OutputImages;
+ if (Settings.Mode == IRSplitMode::IRSM_NONE) {
+ ModuleDesc MD = std::move(M);
+ std::string OutIRFileName = (Settings.OutputPrefix + Twine("_0")).str();
+ auto ImageOrErr =
+ saveModuleDesc(MD, OutIRFileName, Settings.OutputAssembly);
+ if (!ImageOrErr)
+ return ImageOrErr.takeError();
+
+ OutputImages.emplace_back(std::move(*ImageOrErr));
+ return OutputImages;
+ }
+
+ EntryPointGroupVec Groups = selectEntryPointGroups(*M, Settings.Mode);
+ ModuleDesc MD = std::move(M);
+ ModuleSplitter Splitter(std::move(MD), std::move(Groups));
+ size_t ID = 0;
+ while (Splitter.hasMoreSplits()) {
+ ModuleDesc MD = Splitter.getNextSplit();
+
+ std::string OutIRFileName = (Settings.OutputPrefix + "_" + Twine(ID)).str();
+ auto SplitImageOrErr =
+ saveModuleDesc(MD, OutIRFileName, Settings.OutputAssembly);
+ if (!SplitImageOrErr)
+ return SplitImageOrErr.takeError();
+
+ OutputImages.emplace_back(std::move(*SplitImageOrErr));
+ ++ID;
+ }
+
+ return OutputImages;
+}
+
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Utils/SYCLUtils.cpp b/llvm/lib/Transforms/Utils/SYCLUtils.cpp
new file mode 100644
index 0000000000000..7ae94e044bd42
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/SYCLUtils.cpp
@@ -0,0 +1,27 @@
+//===------------ SYCLUtils.cpp - SYCL utility functions ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// SYCL utility functions.
+//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/SYCLUtils.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+void writeSYCLStringTable(const SYCLStringTable &Table, raw_ostream &OS) {
+ assert(Table.size() > 0 && "table should contain at least column titles");
+ size_t numberColumns = Table[0].size();
+ assert(numberColumns > 0 && "table should be non-empty");
+ OS << '[' << join(Table[0].begin(), Table[0].end(), "|") << "]\n";
+ for (size_t I = 1, E = Table.size(); I != E; ++I) {
+ assert(Table[I].size() == numberColumns && "row's size should be equal");
+ OS << join(Table[I].begin(), Table[I].end(), "|") << '\n';
+ }
+}
+
+} // namespace llvm
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/amd-kernel-split.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/amd-kernel-split.ll
new file mode 100644
index 0000000000000..6b0305d12400f
--- /dev/null
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/amd-kernel-split.ll
@@ -0,0 +1,17 @@
+; -- Per-kernel split
+; RUN: llvm-split -sycl-split=kernel -S < %s -o %tC
+; RUN: FileCheck %s -input-file=%tC_0.ll --check-prefixes CHECK-A0
+; RUN: FileCheck %s -input-file=%tC_1.ll --check-prefixes CHECK-A1
+
+define dso_local amdgpu_kernel void @Kernel1() {
+ ret void
+}
+
+define dso_local amdgpu_kernel void @Kernel2() {
+ ret void
+}
+
+; CHECK-A0: define dso_local amdgpu_kernel void @Kernel2()
+; CHECK-A0-NOT: define dso_local amdgpu_kernel void @Kernel1()
+; CHECK-A1-NOT: define dso_local amdgpu_kernel void @Kernel2()
+; CHECK-A1: define dso_local amdgpu_kernel void @Kernel1()
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/auto-module-split-1.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/auto-module-split-1.ll
new file mode 100644
index 0000000000000..3734153b9fbaa
--- /dev/null
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/auto-module-split-1.ll
@@ -0,0 +1,120 @@
+; RUN: llvm-split -sycl-split=source -S < %s -o %t
+; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-TU0,CHECK
+; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-TU1,CHECK
+; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-TU0-TXT
+; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-TU1-TXT
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir64-unknown-linux"
+
+$_Z3barIiET_S0_ = comdat any
+
+; CHECK-TU1-NOT: @{{.*}}GV{{.*}}
+; CHECK-TU0: @{{.*}}GV{{.*}} = internal addrspace(1) constant [1 x i32] [i32 42], align 4
+ at _ZL2GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
+
+; CHECK-TU1: define dso_local spir_kernel void @{{.*}}TU0_kernel0{{.*}}
+; CHECK-TU1-TXT: {{.*}}TU0_kernel0{{.*}}
+; CHECK-TU0-NOT: define dso_local spir_kernel void @{{.*}}TU0_kernel0{{.*}}
+; CHECK-TU0-TXT-NOT: {{.*}}TU0_kernel0{{.*}}
+
+; CHECK-TU1: call spir_func void @{{.*}}foo{{.*}}()
+
+define dso_local spir_kernel void @_ZTSZ4mainE11TU0_kernel0() #0 {
+entry:
+ call spir_func void @_Z3foov()
+ ret void
+}
+
+; CHECK-TU1: define {{.*}} spir_func void @{{.*}}foo{{.*}}()
+; CHECK-TU0-NOT: define {{.*}} spir_func void @{{.*}}foo{{.*}}()
+
+; CHECK-TU1: call spir_func i32 @{{.*}}bar{{.*}}(i32 1)
+
+define dso_local spir_func void @_Z3foov() {
+entry:
+ %a = alloca i32, align 4
+ %call = call spir_func i32 @_Z3barIiET_S0_(i32 1)
+ %add = add nsw i32 2, %call
+ store i32 %add, ptr %a, align 4
+ ret void
+}
+
+; CHECK-TU1: define {{.*}} spir_func i32 @{{.*}}bar{{.*}}(i32 %arg)
+; CHECK-TU0-NOT: define {{.*}} spir_func i32 @{{.*}}bar{{.*}}(i32 %arg)
+
+; Function Attrs: nounwind
+define linkonce_odr dso_local spir_func i32 @_Z3barIiET_S0_(i32 %arg) comdat {
+entry:
+ %arg.addr = alloca i32, align 4
+ store i32 %arg, ptr %arg.addr, align 4
+ %0 = load i32, ptr %arg.addr, align 4
+ ret i32 %0
+}
+
+; CHECK-TU1: define dso_local spir_kernel void @{{.*}}TU0_kernel1{{.*}}()
+; CHECK-TU1-TXT: {{.*}}TU0_kernel1{{.*}}
+; CHECK-TU0-NOT: define dso_local spir_kernel void @{{.*}}TU0_kernel1{{.*}}()
+; CHECK-TU0-TXT-NOT: {{.*}}TU0_kernel1{{.*}}
+
+; CHECK-TU1: call spir_func void @{{.*}}foo1{{.*}}()
+
+define dso_local spir_kernel void @_ZTSZ4mainE11TU0_kernel1() #0 {
+entry:
+ call spir_func void @_Z4foo1v()
+ ret void
+}
+
+; CHECK-TU1: define {{.*}} spir_func void @{{.*}}foo1{{.*}}()
+; CHECK-TU0-NOT: define {{.*}} spir_func void @{{.*}}foo1{{.*}}()
+
+; Function Attrs: nounwind
+define dso_local spir_func void @_Z4foo1v() {
+entry:
+ %a = alloca i32, align 4
+ store i32 2, ptr %a, align 4
+ ret void
+}
+
+; CHECK-TU1-NOT: define dso_local spir_kernel void @{{.*}}TU1_kernel{{.*}}()
+; CHECK-TU1-TXT-NOT: {{.*}}TU1_kernel{{.*}}
+; CHECK-TU0: define dso_local spir_kernel void @{{.*}}TU1_kernel{{.*}}()
+; CHECK-TU0-TXT: {{.*}}TU1_kernel{{.*}}
+
+; CHECK-TU0: call spir_func void @{{.*}}foo2{{.*}}()
+
+define dso_local spir_kernel void @_ZTSZ4mainE10TU1_kernel() #1 {
+entry:
+ call spir_func void @_Z4foo2v()
+ ret void
+}
+
+; CHECK-TU1-NOT: define {{.*}} spir_func void @{{.*}}foo2{{.*}}()
+; CHECK-TU0: define {{.*}} spir_func void @{{.*}}foo2{{.*}}()
+
+; Function Attrs: nounwind
+define dso_local spir_func void @_Z4foo2v() {
+entry:
+ %a = alloca i32, align 4
+; CHECK-TU0: %0 = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @{{.*}}GV{{.*}} to ptr addrspace(4)), align 4
+ %0 = load i32, ptr addrspace(4) getelementptr inbounds ([1 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(1) @_ZL2GV to ptr addrspace(4)), i64 0, i64 0), align 4
+ %add = add nsw i32 4, %0
+ store i32 %add, ptr %a, align 4
+ ret void
+}
+
+attributes #0 = { "sycl-module-id"="TU1.cpp" }
+attributes #1 = { "sycl-module-id"="TU2.cpp" }
+
+; Metadata is saved in both modules.
+; CHECK: !opencl.spir.version = !{!0, !0}
+; CHECK: !spirv.Source = !{!1, !1}
+
+!opencl.spir.version = !{!0, !0}
+!spirv.Source = !{!1, !1}
+
+; CHECK: !0 = !{i32 1, i32 2}
+; CHECK: !1 = !{i32 4, i32 100000}
+
+!0 = !{i32 1, i32 2}
+!1 = !{i32 4, i32 100000}
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/auto-module-split-func-ptr.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/auto-module-split-func-ptr.ll
new file mode 100644
index 0000000000000..2e3d2e5e55c9b
--- /dev/null
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/auto-module-split-func-ptr.ll
@@ -0,0 +1,50 @@
+; This test checks that we can properly perform device code split by tracking
+; all uses of functions (not only direct calls)
+
+; RUN: llvm-split -sycl-split=source -S < %s -o %t
+; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix=CHECK-SYM0
+; RUN: FileCheck %s -input-file=%t_1.sym --check-prefix=CHECK-SYM1
+; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix=CHECK-IR0
+; RUN: FileCheck %s -input-file=%t_1.ll --check-prefix=CHECK-IR1
+
+; CHECK-SYM0: kernel2
+; CHECK-SYM1: kernel1
+;
+; CHECK-IR0: define dso_local spir_kernel void @kernel2
+;
+; CHECK-IR1: @_Z2f1iTable = weak global ptr @_Z2f1i
+; CHECK-IR1: define {{.*}} i32 @_Z2f1i
+; CHECK-IR1: define weak_odr dso_local spir_kernel void @kernel1
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spirv64-unknown-unknown"
+
+ at _Z2f1iTable = weak global ptr @_Z2f1i, align 8
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
+define dso_local spir_func i32 @_Z2f1i(i32 %a) #0 {
+entry:
+ ret i32 %a
+}
+
+; Function Attrs: convergent norecurse
+define weak_odr dso_local spir_kernel void @kernel1() #1 {
+entry:
+ %0 = call i32 @indirect_call(ptr addrspace(4) addrspacecast ( ptr getelementptr inbounds ( [1 x ptr] , ptr @_Z2f1iTable, i64 0, i64 0) to ptr addrspace(4)), i32 0)
+ ret void
+}
+
+; Function Attrs: convergent norecurse
+define dso_local spir_kernel void @kernel2() #2 {
+entry:
+ ret void
+}
+
+declare dso_local spir_func i32 @indirect_call(ptr addrspace(4), i32) local_unnamed_addr
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn }
+attributes #1 = { convergent norecurse "sycl-module-id"="TU1.cpp" }
+attributes #2 = { convergent norecurse "sycl-module-id"="TU2.cpp" }
+
+; CHECK: kernel1
+; CHECK: kernel2
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/basic-module-split.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/basic-module-split.ll
new file mode 100644
index 0000000000000..a916fdfa82b76
--- /dev/null
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/basic-module-split.ll
@@ -0,0 +1,122 @@
+; RUN: llvm-split -sycl-split=source -S < %s -o %t
+; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-TU0,CHECK
+; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-TU1,CHECK
+; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-TU0-TXT
+; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-TU1-TXT
+
+; ModuleID = 'basic-module-split.ll'
+source_filename = "basic-module-split.ll"
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir64-unknown-linux"
+
+$_Z3barIiET_S0_ = comdat any
+
+;CHECK-TU1-NOT: @{{.*}}GV{{.*}}
+;CHECK-TU0: @{{.*}}GV{{.*}} = internal addrspace(1) constant [1 x i32] [i32 42], align 4
+ at _ZL2GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
+
+; CHECK-TU1: define dso_local spir_kernel void @{{.*}}TU0_kernel0{{.*}}
+; CHECK-TU1-TXT: {{.*}}TU0_kernel0{{.*}}
+; CHECK-TU0-NOT: define dso_local spir_kernel void @{{.*}}TU0_kernel0{{.*}}
+; CHECK-TU0-TXT-NOT: {{.*}}TU0_kernel0{{.*}}
+
+; CHECK-TU1: call spir_func void @{{.*}}foo{{.*}}()
+
+define dso_local spir_kernel void @_ZTSZ4mainE11TU0_kernel0() #0 {
+entry:
+ call spir_func void @_Z3foov()
+ ret void
+}
+
+; CHECK-TU1: define {{.*}} spir_func void @{{.*}}foo{{.*}}()
+; CHECK-TU0-NOT: define {{.*}} spir_func void @{{.*}}foo{{.*}}()
+
+; CHECK-TU1: call spir_func i32 @{{.*}}bar{{.*}}(i32 1)
+
+define dso_local spir_func void @_Z3foov() {
+entry:
+ %a = alloca i32, align 4
+ %call = call spir_func i32 @_Z3barIiET_S0_(i32 1)
+ %add = add nsw i32 2, %call
+ store i32 %add, ptr %a, align 4
+ ret void
+}
+
+; CHECK-TU1: define {{.*}} spir_func i32 @{{.*}}bar{{.*}}(i32 %arg)
+; CHECK-TU0-NOT: define {{.*}} spir_func i32 @{{.*}}bar{{.*}}(i32 %arg)
+
+; Function Attrs: nounwind
+define linkonce_odr dso_local spir_func i32 @_Z3barIiET_S0_(i32 %arg) comdat {
+entry:
+ %arg.addr = alloca i32, align 4
+ store i32 %arg, ptr %arg.addr, align 4
+ %0 = load i32, ptr %arg.addr, align 4
+ ret i32 %0
+}
+
+; CHECK-TU1: define dso_local spir_kernel void @{{.*}}TU0_kernel1{{.*}}()
+; CHECK-TU1-TXT: {{.*}}TU0_kernel1{{.*}}
+; CHECK-TU0-NOT: define dso_local spir_kernel void @{{.*}}TU0_kernel1{{.*}}()
+; CHECK-TU0-TXT-NOT: {{.*}}TU0_kernel1{{.*}}
+
+; CHECK-TU1: call spir_func void @{{.*}}foo1{{.*}}()
+
+define dso_local spir_kernel void @_ZTSZ4mainE11TU0_kernel1() #0 {
+entry:
+ call spir_func void @_Z4foo1v()
+ ret void
+}
+
+; CHECK-TU1: define {{.*}} spir_func void @{{.*}}foo1{{.*}}()
+; CHECK-TU0-NOT: define {{.*}} spir_func void @{{.*}}foo1{{.*}}()
+
+; Function Attrs: nounwind
+define dso_local spir_func void @_Z4foo1v() {
+entry:
+ %a = alloca i32, align 4
+ store i32 2, ptr %a, align 4
+ ret void
+}
+
+; CHECK-TU1-NOT: define dso_local spir_kernel void @{{.*}}TU1_kernel{{.*}}()
+; CHECK-TU1-TXT-NOT: {{.*}}TU1_kernel{{.*}}
+; CHECK-TU0: define dso_local spir_kernel void @{{.*}}TU1_kernel{{.*}}()
+; CHECK-TU0-TXT: {{.*}}TU1_kernel{{.*}}
+
+; CHECK-TU0: call spir_func void @{{.*}}foo2{{.*}}()
+
+define dso_local spir_kernel void @_ZTSZ4mainE10TU1_kernel() #1 {
+entry:
+ call spir_func void @_Z4foo2v()
+ ret void
+}
+
+; CHECK-TU1-NOT: define {{.*}} spir_func void @{{.*}}foo2{{.*}}()
+; CHECK-TU0: define {{.*}} spir_func void @{{.*}}foo2{{.*}}()
+
+; Function Attrs: nounwind
+define dso_local spir_func void @_Z4foo2v() {
+entry:
+ %a = alloca i32, align 4
+; CHECK-TU0: %0 = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @{{.*}}GV{{.*}} to ptr addrspace(4)), align 4
+ %0 = load i32, ptr addrspace(4) getelementptr inbounds ([1 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(1) @_ZL2GV to ptr addrspace(4)), i64 0, i64 0), align 4
+ %add = add nsw i32 4, %0
+ store i32 %add, ptr %a, align 4
+ ret void
+}
+
+attributes #0 = { "sycl-module-id"="TU1.cpp" }
+attributes #1 = { "sycl-module-id"="TU2.cpp" }
+
+; Metadata is saved in both modules.
+; CHECK: !opencl.spir.version = !{!0, !0}
+; CHECK: !spirv.Source = !{!1, !1}
+
+!opencl.spir.version = !{!0, !0}
+!spirv.Source = !{!1, !1}
+
+; CHECK; !0 = !{i32 1, i32 2}
+; CHECK; !1 = !{i32 4, i32 100000}
+
+!0 = !{i32 1, i32 2}
+!1 = !{i32 4, i32 100000}
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/complex-indirect-call-chain.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/complex-indirect-call-chain.ll
new file mode 100644
index 0000000000000..1e92034c156bf
--- /dev/null
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/complex-indirect-call-chain.ll
@@ -0,0 +1,67 @@
+; Check that Module splitting can trace through more complex call stacks
+; involving several nested indirect calls.
+
+; RUN: llvm-split -sycl-split=kernel -S < %s -o %t
+; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK0 \
+; RUN: --implicit-check-not @foo --implicit-check-not @kernel_A \
+; RUN: --implicit-check-not @kernel_B
+; RUN: FileCheck %s -input-file=%t_1.ll --check-prefix CHECK1 \
+; RUN: --implicit-check-not @kernel_A --implicit-check-not @kernel_C
+; RUN: FileCheck %s -input-file=%t_2.ll --check-prefix CHECK2 \
+; RUN: --implicit-check-not @foo --implicit-check-not @bar \
+; RUN: --implicit-check-not @BAZ --implicit-check-not @kernel_B \
+; RUN: --implicit-check-not @kernel_C
+
+; CHECK0-DAG: define spir_kernel void @kernel_C
+; CHECK0-DAG: define spir_func i32 @bar
+; CHECK0-DAG: define spir_func void @baz
+; CHECK0-DAG: define spir_func void @BAZ
+
+; CHECK1-DAG: define spir_kernel void @kernel_B
+; CHECK1-DAG: define {{.*}}spir_func i32 @foo
+; CHECK1-DAG: define spir_func i32 @bar
+; CHECK1-DAG: define spir_func void @baz
+; CHECK1-DAG: define spir_func void @BAZ
+
+; CHECK2-DAG: define spir_kernel void @kernel_A
+; CHECK2-DAG: define {{.*}}spir_func void @baz
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown"
+
+define spir_func i32 @foo(i32 (i32, void ()*)* %ptr1, void ()* %ptr2) {
+ %1 = call spir_func i32 %ptr1(i32 42, void ()* %ptr2)
+ ret i32 %1
+}
+
+define spir_func i32 @bar(i32 %arg, void ()* %ptr) {
+ call spir_func void %ptr()
+ ret i32 %arg
+}
+
+define spir_func void @baz() {
+ ret void
+}
+
+define spir_func void @BAZ() {
+ ret void
+}
+
+define spir_kernel void @kernel_A() #0 {
+ call spir_func void @baz()
+ ret void
+}
+
+define spir_kernel void @kernel_B() #1 {
+ call spir_func i32 @foo(i32 (i32, void ()*)* null, void ()* null)
+ ret void
+}
+
+define spir_kernel void @kernel_C() #2 {
+ call spir_func i32 @bar(i32 42, void ()* null)
+ ret void
+}
+
+attributes #0 = { "sycl-module-id"="TU1.cpp" }
+attributes #1 = { "sycl-module-id"="TU2.cpp" }
+attributes #2 = { "sycl-module-id"="TU3.cpp" }
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/one-kernel-per-module.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/one-kernel-per-module.ll
new file mode 100644
index 0000000000000..ddb0ea0b3c59a
--- /dev/null
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/one-kernel-per-module.ll
@@ -0,0 +1,135 @@
+; Test checks "kernel" splitting mode.
+
+; RUN: llvm-split -sycl-split=kernel -S < %s -o %t.files
+; RUN: FileCheck %s -input-file=%t.files_0.ll --check-prefixes CHECK-MODULE0,CHECK
+; RUN: FileCheck %s -input-file=%t.files_0.sym --check-prefixes CHECK-MODULE0-TXT
+; RUN: FileCheck %s -input-file=%t.files_1.ll --check-prefixes CHECK-MODULE1,CHECK
+; RUN: FileCheck %s -input-file=%t.files_1.sym --check-prefixes CHECK-MODULE1-TXT
+; RUN: FileCheck %s -input-file=%t.files_2.ll --check-prefixes CHECK-MODULE2,CHECK
+; RUN: FileCheck %s -input-file=%t.files_2.sym --check-prefixes CHECK-MODULE2-TXT
+
+; ModuleID = 'one-kernel-per-module.ll'
+source_filename = "one-kernel-per-module.ll"
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spirv64-unknown-unknown"
+
+$_Z3barIiET_S0_ = comdat any
+
+;CHECK-MODULE2-NOT: @{{.*}}GV{{.*}}
+;CHECK-MODULE1-NOT: @{{.*}}GV{{.*}}
+;CHECK-MODULE0: @{{.*}}GV{{.*}} = internal addrspace(1) constant [1 x i32] [i32 42], align 4
+ at _ZL2GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
+
+; CHECK-MODULE2: define dso_local spir_kernel void @{{.*}}TU0_kernel0{{.*}}
+; CHECK-MODULE2-TXT: {{.*}}TU0_kernel0{{.*}}
+; CHECK-MODULE1-NOT: define dso_local spir_kernel void @{{.*}}TU0_kernel0{{.*}}
+; CHECK-MODULE1-TXT-NOT: {{.*}}TU0_kernel0{{.*}}
+
+; CHECK-MODULE2: call spir_func void @{{.*}}foo{{.*}}()
+
+define dso_local spir_kernel void @TU0_kernel0() #0 {
+entry:
+ call spir_func void @_Z3foov()
+ ret void
+}
+
+; CHECK-MODULE2: define {{.*}} spir_func void @{{.*}}foo{{.*}}()
+; CHECK-MODULE1-NOT: define {{.*}} spir_func void @{{.*}}foo{{.*}}()
+; CHECK-MODULE0-NOT: define {{.*}} spir_func void @{{.*}}foo{{.*}}()
+
+; CHECK-MODULE2: call spir_func i32 @{{.*}}bar{{.*}}(i32 1)
+
+define dso_local spir_func void @_Z3foov() {
+entry:
+ %a = alloca i32, align 4
+ %call = call spir_func i32 @_Z3barIiET_S0_(i32 1)
+ %add = add nsw i32 2, %call
+ store i32 %add, ptr %a, align 4
+ ret void
+}
+
+; CHECK-MODULE2: define {{.*}} spir_func i32 @{{.*}}bar{{.*}}(i32 %arg)
+; CHECK-MODULE1-NOT: define {{.*}} spir_func i32 @{{.*}}bar{{.*}}(i32 %arg)
+; CHECK-MODULE0-NOT: define {{.*}} spir_func i32 @{{.*}}bar{{.*}}(i32 %arg)
+
+; Function Attrs: nounwind
+define linkonce_odr dso_local spir_func i32 @_Z3barIiET_S0_(i32 %arg) comdat {
+entry:
+ %arg.addr = alloca i32, align 4
+ store i32 %arg, ptr %arg.addr, align 4
+ %0 = load i32, ptr %arg.addr, align 4
+ ret i32 %0
+}
+
+; CHECK-MODULE2-NOT: define dso_local spir_kernel void @{{.*}}TU0_kernel1{{.*}}()
+; CHECK-MODULE2-TXT-NOT: {{.*}}TU0_kernel1{{.*}}
+; CHECK-MODULE1: define dso_local spir_kernel void @{{.*}}TU0_kernel1{{.*}}()
+; CHECK-MODULE1-TXT: {{.*}}TU0_kernel1{{.*}}
+; CHECK-MODULE0-NOT: define dso_local spir_kernel void @{{.*}}TU0_kernel1{{.*}}()
+; CHECK-MODULE0-TXT-NOT: {{.*}}TU0_kernel1{{.*}}
+
+; CHECK-MODULE1: call spir_func void @{{.*}}foo1{{.*}}()
+
+define dso_local spir_kernel void @TU0_kernel1() #0 {
+entry:
+ call spir_func void @_Z4foo1v()
+ ret void
+}
+
+; CHECK-MODULE2-NOT: define {{.*}} spir_func void @{{.*}}foo1{{.*}}()
+; CHECK-MODULE1: define {{.*}} spir_func void @{{.*}}foo1{{.*}}()
+; CHECK-MODULE0-NOT: define {{.*}} spir_func void @{{.*}}foo1{{.*}}()
+
+; Function Attrs: nounwind
+define dso_local spir_func void @_Z4foo1v() {
+entry:
+ %a = alloca i32, align 4
+ store i32 2, ptr %a, align 4
+ ret void
+}
+
+; CHECK-MODULE2-NOT: define dso_local spir_kernel void @{{.*}}TU1_kernel{{.*}}()
+; CHECK-MODULE2-TXT-NOT: {{.*}}TU1_kernel{{.*}}
+; CHECK-MODULE1-NOT: define dso_local spir_kernel void @{{.*}}TU1_kernel{{.*}}()
+; CHECK-MODULE1-TXT-NOT: {{.*}}TU1_kernel{{.*}}
+; CHECK-MODULE0: define dso_local spir_kernel void @{{.*}}TU1_kernel{{.*}}()
+; CHECK-MODULE0-TXT: {{.*}}TU1_kernel{{.*}}
+
+; CHECK-MODULE0: call spir_func void @{{.*}}foo2{{.*}}()
+
+define dso_local spir_kernel void @TU1_kernel() #1 {
+entry:
+ call spir_func void @_Z4foo2v()
+ ret void
+}
+
+; CHECK-MODULE2-NOT: define {{.*}} spir_func void @{{.*}}foo2{{.*}}()
+; CHECK-MODULE1-NOT: define {{.*}} spir_func void @{{.*}}foo2{{.*}}()
+; CHECK-MODULE0: define {{.*}} spir_func void @{{.*}}foo2{{.*}}()
+
+; Function Attrs: nounwind
+define dso_local spir_func void @_Z4foo2v() {
+entry:
+ %a = alloca i32, align 4
+; CHECK-MODULE0: %0 = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @{{.*}}GV{{.*}} to ptr addrspace(4)), align 4
+ %0 = load i32, ptr addrspace(4) getelementptr inbounds ([1 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(1) @_ZL2GV to ptr addrspace(4)), i64 0, i64 0), align 4
+ %add = add nsw i32 4, %0
+ store i32 %add, ptr %a, align 4
+ ret void
+}
+
+attributes #0 = { "sycl-module-id"="TU1.cpp" }
+attributes #1 = { "sycl-module-id"="TU2.cpp" }
+
+; Metadata is saved in both modules.
+; CHECK: !opencl.spir.version = !{!0, !0}
+; CHECK: !spirv.Source = !{!1, !1}
+
+!opencl.spir.version = !{!0, !0}
+!spirv.Source = !{!1, !1}
+
+; CHECK; !0 = !{i32 1, i32 2}
+; CHECK; !1 = !{i32 4, i32 100000}
+
+!0 = !{i32 1, i32 2}
+!1 = !{i32 4, i32 100000}
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/per-reqd-sub-group-size-split-1.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/per-reqd-sub-group-size-split-1.ll
new file mode 100644
index 0000000000000..921b7c22fc365
--- /dev/null
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/per-reqd-sub-group-size-split-1.ll
@@ -0,0 +1,105 @@
+; The test checks that Module splitting correctly separates kernels
+; that use reqd_sub_group_size attributes from kernels which doesn't use them
+; regardless of device code split mode
+
+; This test emulates two translation units with 3 kernels:
+; TU0_kernel0 - 1st translation unit, no reqd_sub_group_size attribute used
+; TU0_kernel1 - 1st translation unit, reqd_sub_group_size attribute is used
+; TU1_kernel2 - 2nd translation unit, no reqd_sub_group_size attribute used
+
+; RUN: llvm-split -sycl-split=kernel -S %s -o %t
+; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \
+; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
+; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \
+; RUN: --implicit-check-not kernel0 --implicit-check-not kernel2
+; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M2-IR \
+; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2
+; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-M0-SYMS \
+; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
+; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M1-SYMS \
+; RUN: --implicit-check-not kernel0 --implicit-check-not kernel2
+; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M2-SYMS \
+; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2
+
+; Regardless of device code split mode, each kernel should go into a separate
+; device image
+
+; CHECK-M2-IR: define {{.*}} @TU0_kernel0
+; CHECK-M2-SYMS: TU0_kernel0
+
+; CHECK-M1-IR: define {{.*}} @TU0_kernel1
+; CHECK-M1-SYMS: TU0_kernel1
+
+; CHECK-M0-IR: define {{.*}} @TU1_kernel2
+; CHECK-M0-SYMS: TU1_kernel2
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir64-unknown-linux"
+
+; FIXME: device globals should also be properly distributed across device images
+; if they are of optional type
+ at _ZL2GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
+
+define dso_local spir_kernel void @TU0_kernel0() #0 {
+entry:
+ call spir_func void @foo()
+ ret void
+}
+
+define dso_local spir_func void @foo() {
+entry:
+ %a = alloca i32, align 4
+ %call = call spir_func i32 @bar(i32 1)
+ %add = add nsw i32 2, %call
+ store i32 %add, i32* %a, align 4
+ ret void
+}
+
+; Function Attrs: nounwind
+define linkonce_odr dso_local spir_func i32 @bar(i32 %arg) {
+entry:
+ %arg.addr = alloca i32, align 4
+ store i32 %arg, i32* %arg.addr, align 4
+ %0 = load i32, i32* %arg.addr, align 4
+ ret i32 %0
+}
+
+define dso_local spir_kernel void @TU0_kernel1() #0 !intel_reqd_sub_group_size !2 {
+entry:
+ call spir_func void @foo1()
+ ret void
+}
+
+; Function Attrs: nounwind
+define dso_local spir_func void @foo1() {
+entry:
+ %a = alloca i32, align 4
+ store i32 2, i32* %a, align 4
+ ret void
+}
+
+define dso_local spir_kernel void @TU1_kernel2() #1 {
+entry:
+ call spir_func void @foo2()
+ ret void
+}
+
+; Function Attrs: nounwind
+define dso_local spir_func void @foo2() {
+entry:
+ %a = alloca i32, align 4
+ %0 = load i32, i32 addrspace(4)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(4)* addrspacecast ([1 x i32] addrspace(1)* @_ZL2GV to [1 x i32] addrspace(4)*), i64 0, i64 0), align 4
+ %add = add nsw i32 4, %0
+ store i32 %add, i32* %a, align 4
+ ret void
+}
+
+attributes #0 = { "sycl-module-id"="TU1.cpp" }
+attributes #1 = { "sycl-module-id"="TU2.cpp" }
+
+!opencl.spir.version = !{!0, !0}
+!spirv.Source = !{!1, !1}
+
+!0 = !{i32 1, i32 2}
+!1 = !{i32 4, i32 100000}
+!2 = !{i32 32}
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/per-reqd-wg-size-split-1.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/per-reqd-wg-size-split-1.ll
new file mode 100644
index 0000000000000..2ca8b220edfbe
--- /dev/null
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/per-reqd-wg-size-split-1.ll
@@ -0,0 +1,105 @@
+; The test checks that Module splitting correctly separates kernels
+; that use reqd_work_group_size attributes from kernels which doesn't use them
+; regardless of device code split mode
+
+; This test emulates two translation units with 3 kernels:
+; TU0_kernel0 - 1st translation unit, no reqd_work_group_size attribute used
+; TU0_kernel1 - 1st translation unit, reqd_work_group_size attribute is used
+; TU1_kernel2 - 2nd translation unit, no reqd_work_group_size attribute used
+
+; RUN: llvm-split -sycl-split=kernel -S < %s -o %t
+; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \
+; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
+; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \
+; RUN: --implicit-check-not kernel0 --implicit-check-not kernel2
+; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M2-IR \
+; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2
+; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-M0-SYMS \
+; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
+; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M1-SYMS \
+; RUN: --implicit-check-not kernel0 --implicit-check-not kernel2
+; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M2-SYMS \
+; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2
+
+; Regardless of device code split mode, each kernel should go into a separate
+; device image
+
+; CHECK-M2-IR: define {{.*}} @TU0_kernel0
+; CHECK-M2-SYMS: TU0_kernel0
+
+; CHECK-M1-IR: define {{.*}} @TU0_kernel1
+; CHECK-M1-SYMS: TU0_kernel1
+
+; CHECK-M0-IR: define {{.*}} @TU1_kernel2
+; CHECK-M0-SYMS: TU1_kernel2
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir64-unknown-linux"
+
+; FIXME: device globals should also be properly distributed across device images
+; if they are of optional type
+ at _ZL2GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
+
+define dso_local spir_kernel void @TU0_kernel0() #0 {
+entry:
+ call spir_func void @foo()
+ ret void
+}
+
+define dso_local spir_func void @foo() {
+entry:
+ %a = alloca i32, align 4
+ %call = call spir_func i32 @bar(i32 1)
+ %add = add nsw i32 2, %call
+ store i32 %add, i32* %a, align 4
+ ret void
+}
+
+; Function Attrs: nounwind
+define linkonce_odr dso_local spir_func i32 @bar(i32 %arg) {
+entry:
+ %arg.addr = alloca i32, align 4
+ store i32 %arg, i32* %arg.addr, align 4
+ %0 = load i32, i32* %arg.addr, align 4
+ ret i32 %0
+}
+
+define dso_local spir_kernel void @TU0_kernel1() #0 !reqd_work_group_size !2 {
+entry:
+ call spir_func void @foo1()
+ ret void
+}
+
+; Function Attrs: nounwind
+define dso_local spir_func void @foo1() {
+entry:
+ %a = alloca i32, align 4
+ store i32 2, i32* %a, align 4
+ ret void
+}
+
+define dso_local spir_kernel void @TU1_kernel2() #1 {
+entry:
+ call spir_func void @foo2()
+ ret void
+}
+
+; Function Attrs: nounwind
+define dso_local spir_func void @foo2() {
+entry:
+ %a = alloca i32, align 4
+ %0 = load i32, i32 addrspace(4)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(4)* addrspacecast ([1 x i32] addrspace(1)* @_ZL2GV to [1 x i32] addrspace(4)*), i64 0, i64 0), align 4
+ %add = add nsw i32 4, %0
+ store i32 %add, i32* %a, align 4
+ ret void
+}
+
+attributes #0 = { "sycl-module-id"="TU1.cpp" }
+attributes #1 = { "sycl-module-id"="TU2.cpp" }
+
+!opencl.spir.version = !{!0, !0}
+!spirv.Source = !{!1, !1}
+
+!0 = !{i32 1, i32 2}
+!1 = !{i32 4, i32 100000}
+!2 = !{i32 32}
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/split-with-kernel-declarations.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/split-with-kernel-declarations.ll
new file mode 100644
index 0000000000000..4ba15ecdefea6
--- /dev/null
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/split-with-kernel-declarations.ll
@@ -0,0 +1,53 @@
+; The test checks that Module splitting does not treat declarations as entry points.
+
+; RUN: llvm-split -sycl-split=kernel -S < %s -o %t2
+; RUN: FileCheck %s -input-file=%t2.table --check-prefix CHECK-PER-KERNEL-TABLE
+; RUN: FileCheck %s -input-file=%t2_0.sym --check-prefix CHECK-PER-KERNEL-SYM1
+; RUN: FileCheck %s -input-file=%t2_1.sym --check-prefix CHECK-PER-KERNEL-SYM2
+; RUN: FileCheck %s -input-file=%t2_2.sym --check-prefix CHECK-PER-KERNEL-SYM0
+
+; With per-kernel split, there should be three device images
+; CHECK-PER-KERNEL-TABLE: [Code|Symbols]
+; CHECK-PER-KERNEL-TABLE: {{.*}}_0.ll|{{.*}}_0.sym
+; CHECK-PER-KERNEL-TABLE-NEXT: {{.*}}_1.ll|{{.*}}_1.sym
+; CHECK-PER-KERNEL-TABLE-NEXT: {{.*}}_2.ll|{{.*}}_2.sym
+; CHECK-PER-KERNEL-TABLE-EMPTY:
+;
+; CHECK-PER-KERNEL-SYM0-NOT: _ZTS4mainE10TU1_kernel1
+; CHECK-PER-KERNEL-SYM0: _ZTSZ4mainE10TU1_kernel0
+; CHECK-PER-KERNEL-SYM0-EMPTY:
+;
+; CHECK-PER-KERNEL-SYM2-NOT: _ZTS4mainE10TU1_kernel1
+; CHECK-PER-KERNEL-SYM2: _ZTSZ4mainE11TU0_kernel0
+; CHECK-PER-KERNEL-SYM2-EMPTY:
+;
+; CHECK-PER-KERNEL-SYM1-NOT: _ZTS4mainE10TU1_kernel1
+; CHECK-PER-KERNEL-SYM1: _ZTSZ4mainE11TU0_kernel1
+; CHECK-PER-KERNEL-SYM1-EMPTY:
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir64-unknown-linux"
+
+define spir_kernel void @_ZTSZ4mainE11TU0_kernel0() #0 {
+entry:
+ ret void
+}
+
+define spir_kernel void @_ZTSZ4mainE11TU0_kernel1() #0 {
+entry:
+ ret void
+}
+
+define spir_kernel void @_ZTSZ4mainE10TU1_kernel0() #1 {
+ ret void
+}
+
+declare spir_kernel void @_ZTS4mainE10TU1_kernel1() #1
+
+attributes #0 = { "sycl-module-id"="TU1.cpp" }
+attributes #1 = { "sycl-module-id"="TU2.cpp" }
+
+!opencl.spir.version = !{!0, !0}
+!spirv.Source = !{!1, !1}
+!0 = !{i32 1, i32 2}
+!1 = !{i32 4, i32 100000}
diff --git a/llvm/tools/llvm-split/llvm-split.cpp b/llvm/tools/llvm-split/llvm-split.cpp
index c456403e6bc68..e40bf71026472 100644
--- a/llvm/tools/llvm-split/llvm-split.cpp
+++ b/llvm/tools/llvm-split/llvm-split.cpp
@@ -19,6 +19,7 @@
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetSelect.h"
@@ -27,8 +28,13 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Utils/SYCLModuleSplit.h"
+#include "llvm/Transforms/Utils/SYCLUtils.h"
#include "llvm/Transforms/Utils/SplitModule.h"
+#include <vector>
+#include <string>
+
using namespace llvm;
static cl::OptionCategory SplitCategory("Split Options");
@@ -70,6 +76,64 @@ static cl::opt<std::string>
MCPU("mcpu", cl::desc("Target CPU, ignored if -mtriple is not used"),
cl::value_desc("cpu"), cl::cat(SplitCategory));
+cl::opt<IRSplitMode> SYCLSplitMode(
+ "sycl-split", cl::desc("module split mode"), cl::Optional,
+ cl::init(IRSplitMode::IRSM_NONE),
+ cl::values(clEnumValN(IRSplitMode::IRSM_PER_TU, "source",
+ "1 ouptput module per translation unit"),
+ clEnumValN(IRSplitMode::IRSM_PER_KERNEL, "kernel",
+ "1 output module per kernel")),
+ cl::cat(SplitCategory));
+
+cl::opt<bool> OutputAssembly{"S", cl::desc("Write output as LLVM assembly"),
+ cl::cat(SplitCategory)};
+
+void writeStringToFile(std::string_view Content, StringRef Path) {
+ std::error_code EC;
+ raw_fd_ostream OS(Path, EC);
+ if (EC) {
+ errs() << formatv("error opening file: {0}\n", Path);
+ exit(1);
+ }
+
+ OS << Content << "\n";
+}
+
+void writeSplitModulesAsTable(ArrayRef<SYCLSplitModule> SplitModules,
+ StringRef Path) {
+ std::vector<std::string> Columns = {"Code", "Symbols"};
+ SYCLStringTable Table;
+ Table.emplace_back(std::move(Columns));
+ for (const auto &[I, SM] : enumerate(SplitModules)) {
+ std::string SymbolsFile = (Twine(Path) + "_" + Twine(I) + ".sym").str();
+ writeStringToFile(SM.Symbols, SymbolsFile);
+ std::vector<std::string> Row = {SM.ModuleFilePath, SymbolsFile};
+ Table.emplace_back(std::move(Row));
+ }
+
+ std::error_code EC;
+ raw_fd_ostream OS((Path + ".table").str(), EC);
+ if (EC) {
+ errs() << formatv("error opening file: {0}\n", Path);
+ exit(1);
+ }
+
+ writeSYCLStringTable(Table, OS);
+}
+
+Error runSYCLSplitModule(std::unique_ptr<Module> M) {
+ ModuleSplitterSettings Settings;
+ Settings.Mode = SYCLSplitMode;
+ Settings.OutputAssembly = OutputAssembly;
+ Settings.OutputPrefix = OutputFilename;
+ auto SplitModulesOrErr = splitSYCLModule(std::move(M), Settings);
+ if (!SplitModulesOrErr)
+ return SplitModulesOrErr.takeError();
+
+ writeSplitModulesAsTable(*SplitModulesOrErr, OutputFilename);
+ return Error::success();
+}
+
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
@@ -123,6 +187,16 @@ int main(int argc, char **argv) {
Out->keep();
};
+ if (SYCLSplitMode != IRSplitMode::IRSM_NONE) {
+ auto E = runSYCLSplitModule(std::move(M));
+ if (E) {
+ errs() << E << "\n";
+ Err.print(argv[0], errs());
+ }
+
+ return 0;
+ }
+
if (TM) {
if (PreserveLocals) {
errs() << "warning: -preserve-locals has no effect when using "
>From 115cf87a684c82b30fb5fe42fd7ed8557ddbf7c6 Mon Sep 17 00:00:00 2001
From: "Sabianin, Maksim" <maksim.sabianin at intel.com>
Date: Tue, 14 Jan 2025 04:42:33 -0800
Subject: [PATCH 02/14] Address code review feedback.
---
llvm/lib/Transforms/Utils/SYCLModuleSplit.cpp | 11 ++++++++---
llvm/tools/llvm-split/llvm-split.cpp | 4 ++--
2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SYCLModuleSplit.cpp b/llvm/lib/Transforms/Utils/SYCLModuleSplit.cpp
index e6a36a1fba969..157d034858f5f 100644
--- a/llvm/lib/Transforms/Utils/SYCLModuleSplit.cpp
+++ b/llvm/lib/Transforms/Utils/SYCLModuleSplit.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PassManagerImpl.h"
#include "llvm/IRPrinter/IRPrintingPasses.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
@@ -37,7 +38,7 @@
using namespace llvm;
-#define DEBUG_TYPE "sycl_module_split"
+#define DEBUG_TYPE "sycl-module-split"
static bool isKernel(const Function &F) {
return F.getCallingConv() == CallingConv::SPIR_KERNEL ||
@@ -75,7 +76,8 @@ struct EntryPointGroup {
EntryPointSet Functions = EntryPointSet())
: GroupName(GroupName), Functions(std::move(Functions)) {}
- void dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const {
constexpr size_t INDENT = 4;
dbgs().indent(INDENT) << "ENTRY POINTS"
<< " " << GroupName << " {\n";
@@ -84,6 +86,7 @@ struct EntryPointGroup {
dbgs().indent(INDENT) << "}\n";
}
+#endif
};
/// Annotates an llvm::Module with information necessary to perform and track
@@ -133,11 +136,13 @@ class ModuleDesc {
return std::string(ST);
}
- void dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const {
dbgs() << "ModuleDesc[" << M->getName() << "] {\n";
EntryPoints.dump();
dbgs() << "}\n";
}
+#endif
};
// Represents "dependency" or "use" graph of global objects (functions and
diff --git a/llvm/tools/llvm-split/llvm-split.cpp b/llvm/tools/llvm-split/llvm-split.cpp
index e40bf71026472..cd25426f23b29 100644
--- a/llvm/tools/llvm-split/llvm-split.cpp
+++ b/llvm/tools/llvm-split/llvm-split.cpp
@@ -32,8 +32,8 @@
#include "llvm/Transforms/Utils/SYCLUtils.h"
#include "llvm/Transforms/Utils/SplitModule.h"
-#include <vector>
#include <string>
+#include <vector>
using namespace llvm;
@@ -100,7 +100,7 @@ void writeStringToFile(std::string_view Content, StringRef Path) {
}
void writeSplitModulesAsTable(ArrayRef<SYCLSplitModule> SplitModules,
- StringRef Path) {
+ StringRef Path) {
std::vector<std::string> Columns = {"Code", "Symbols"};
SYCLStringTable Table;
Table.emplace_back(std::move(Columns));
>From c054d0f5f4734c39ff659f36818099dc1dc2b195 Mon Sep 17 00:00:00 2001
From: "Sabianin, Maksim" <maksim.sabianin at intel.com>
Date: Tue, 28 Jan 2025 07:14:30 -0800
Subject: [PATCH 03/14] rename splitSYCLModule to SYCLSplitModule
---
.../{SYCLModuleSplit.h => SYCLSplitModule.h} | 30 +++++++++----------
llvm/lib/Transforms/Utils/CMakeLists.txt | 2 +-
...YCLModuleSplit.cpp => SYCLSplitModule.cpp} | 22 +++++++-------
llvm/tools/llvm-split/llvm-split.cpp | 14 ++++-----
4 files changed, 34 insertions(+), 34 deletions(-)
rename llvm/include/llvm/Transforms/Utils/{SYCLModuleSplit.h => SYCLSplitModule.h} (68%)
rename llvm/lib/Transforms/Utils/{SYCLModuleSplit.cpp => SYCLSplitModule.cpp} (96%)
diff --git a/llvm/include/llvm/Transforms/Utils/SYCLModuleSplit.h b/llvm/include/llvm/Transforms/Utils/SYCLSplitModule.h
similarity index 68%
rename from llvm/include/llvm/Transforms/Utils/SYCLModuleSplit.h
rename to llvm/include/llvm/Transforms/Utils/SYCLSplitModule.h
index 4df3e0321e9cd..eb57cc6d5e6e0 100644
--- a/llvm/include/llvm/Transforms/Utils/SYCLModuleSplit.h
+++ b/llvm/include/llvm/Transforms/Utils/SYCLSplitModule.h
@@ -1,4 +1,4 @@
-//===-------- SYCLModuleSplit.h - module split ------------------*- C++ -*-===//
+//===-------- SYCLSplitModule.h - module split ------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -10,8 +10,8 @@
// of the split is new modules containing corresponding callgraph.
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SYCL_MODULE_SPLIT_H
-#define LLVM_SYCL_MODULE_SPLIT_H
+#ifndef LLVM_SYCL_SPLIT_MODULE_H
+#define LLVM_SYCL_SPLIT_MODULE_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
@@ -38,17 +38,17 @@ std::optional<IRSplitMode> convertStringToSplitMode(StringRef S);
/// The structure represents a split LLVM Module accompanied by additional
/// information. Split Modules are being stored at disk due to the high RAM
/// consumption during the whole splitting process.
-struct SYCLSplitModule {
+struct ModuleAndSYCLMetadata {
std::string ModuleFilePath;
std::string Symbols;
- SYCLSplitModule() = default;
- SYCLSplitModule(const SYCLSplitModule &) = default;
- SYCLSplitModule &operator=(const SYCLSplitModule &) = default;
- SYCLSplitModule(SYCLSplitModule &&) = default;
- SYCLSplitModule &operator=(SYCLSplitModule &&) = default;
+ ModuleAndSYCLMetadata() = default;
+ ModuleAndSYCLMetadata(const ModuleAndSYCLMetadata &) = default;
+ ModuleAndSYCLMetadata &operator=(const ModuleAndSYCLMetadata &) = default;
+ ModuleAndSYCLMetadata(ModuleAndSYCLMetadata &&) = default;
+ ModuleAndSYCLMetadata &operator=(ModuleAndSYCLMetadata &&) = default;
- SYCLSplitModule(std::string_view File, std::string Symbols)
+ ModuleAndSYCLMetadata(std::string_view File, std::string Symbols)
: ModuleFilePath(File), Symbols(std::move(Symbols)) {}
};
@@ -59,13 +59,13 @@ struct ModuleSplitterSettings {
};
/// Parses the string table.
-Expected<SmallVector<SYCLSplitModule, 0>>
-parseSYCLSplitModulesFromFile(StringRef File);
+Expected<SmallVector<ModuleAndSYCLMetadata, 0>>
+parseModuleAndSYCLMetadataFromFile(StringRef File);
/// Splits the given module \p M according to the given \p Settings.
-Expected<SmallVector<SYCLSplitModule, 0>>
-splitSYCLModule(std::unique_ptr<Module> M, ModuleSplitterSettings Settings);
+Expected<SmallVector<ModuleAndSYCLMetadata, 0>>
+SYCLSplitModule(std::unique_ptr<Module> M, ModuleSplitterSettings Settings);
} // namespace llvm
-#endif // LLVM_SYCL_MODULE_SPLIT_H
+#endif // LLVM_SYCL_SPLIT_MODULE_H
diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt
index 01b5ba2d818c7..0ba46bdadea8d 100644
--- a/llvm/lib/Transforms/Utils/CMakeLists.txt
+++ b/llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -83,7 +83,7 @@ add_llvm_component_library(LLVMTransformUtils
SizeOpts.cpp
SplitModule.cpp
StripNonLineTableDebugInfo.cpp
- SYCLModuleSplit.cpp
+ SYCLSplitModule.cpp
SYCLUtils.cpp
SymbolRewriter.cpp
UnifyFunctionExitNodes.cpp
diff --git a/llvm/lib/Transforms/Utils/SYCLModuleSplit.cpp b/llvm/lib/Transforms/Utils/SYCLSplitModule.cpp
similarity index 96%
rename from llvm/lib/Transforms/Utils/SYCLModuleSplit.cpp
rename to llvm/lib/Transforms/Utils/SYCLSplitModule.cpp
index 157d034858f5f..0b1c116d738bc 100644
--- a/llvm/lib/Transforms/Utils/SYCLModuleSplit.cpp
+++ b/llvm/lib/Transforms/Utils/SYCLSplitModule.cpp
@@ -1,4 +1,4 @@
-//===-------- SYCLModuleSplitter.cpp - split a module into callgraphs -----===//
+//===-------- SYCLSplitModule.cpp - split a module into callgraphs --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -8,7 +8,7 @@
// See comments in the header.
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Utils/SYCLModuleSplit.h"
+#include "llvm/Transforms/Utils/SYCLSplitModule.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h"
@@ -38,7 +38,7 @@
using namespace llvm;
-#define DEBUG_TYPE "sycl-module-split"
+#define DEBUG_TYPE "sycl-split-module"
static bool isKernel(const Function &F) {
return F.getCallingConv() == CallingConv::SPIR_KERNEL ||
@@ -410,13 +410,13 @@ static Error saveModuleIRInFile(Module &M, StringRef FilePath,
return Error::success();
}
-static Expected<SYCLSplitModule>
+static Expected<ModuleAndSYCLMetadata>
saveModuleDesc(ModuleDesc &MD, std::string Prefix, bool OutputAssembly) {
Prefix += OutputAssembly ? ".ll" : ".bc";
if (Error E = saveModuleIRInFile(MD.getModule(), Prefix, OutputAssembly))
return E;
- SYCLSplitModule SM;
+ ModuleAndSYCLMetadata SM;
SM.ModuleFilePath = Prefix;
SM.Symbols = MD.makeSymbolTable();
return SM;
@@ -424,8 +424,8 @@ saveModuleDesc(ModuleDesc &MD, std::string Prefix, bool OutputAssembly) {
namespace llvm {
-Expected<SmallVector<SYCLSplitModule, 0>>
-parseSYCLSplitModulesFromFile(StringRef File) {
+Expected<SmallVector<ModuleAndSYCLMetadata, 0>>
+parseModuleAndSYCLMetadataFromFile(StringRef File) {
auto EntriesMBOrErr = llvm::MemoryBuffer::getFile(File);
if (!EntriesMBOrErr)
return createFileError(File, EntriesMBOrErr.getError());
@@ -438,7 +438,7 @@ parseSYCLSplitModulesFromFile(StringRef File) {
// "Code" and "Symbols" at the moment.
static constexpr int NUMBER_COLUMNS = 2;
++LI;
- SmallVector<SYCLSplitModule, 0> Modules;
+ SmallVector<ModuleAndSYCLMetadata, 0> Modules;
while (!LI.is_at_eof()) {
StringRef Line = *LI;
if (Line.empty())
@@ -480,9 +480,9 @@ std::optional<IRSplitMode> convertStringToSplitMode(StringRef S) {
return It->second;
}
-Expected<SmallVector<SYCLSplitModule, 0>>
-splitSYCLModule(std::unique_ptr<Module> M, ModuleSplitterSettings Settings) {
- SmallVector<SYCLSplitModule, 0> OutputImages;
+Expected<SmallVector<ModuleAndSYCLMetadata, 0>>
+SYCLSplitModule(std::unique_ptr<Module> M, ModuleSplitterSettings Settings) {
+ SmallVector<ModuleAndSYCLMetadata, 0> OutputImages;
if (Settings.Mode == IRSplitMode::IRSM_NONE) {
ModuleDesc MD = std::move(M);
std::string OutIRFileName = (Settings.OutputPrefix + Twine("_0")).str();
diff --git a/llvm/tools/llvm-split/llvm-split.cpp b/llvm/tools/llvm-split/llvm-split.cpp
index c1c9814920841..8c14879fca7a5 100644
--- a/llvm/tools/llvm-split/llvm-split.cpp
+++ b/llvm/tools/llvm-split/llvm-split.cpp
@@ -28,7 +28,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/TargetParser/Triple.h"
-#include "llvm/Transforms/Utils/SYCLModuleSplit.h"
+#include "llvm/Transforms/Utils/SYCLSplitModule.h"
#include "llvm/Transforms/Utils/SYCLUtils.h"
#include "llvm/Transforms/Utils/SplitModule.h"
@@ -99,12 +99,12 @@ void writeStringToFile(std::string_view Content, StringRef Path) {
OS << Content << "\n";
}
-void writeSplitModulesAsTable(ArrayRef<SYCLSplitModule> SplitModules,
+void writeSplitModulesAsTable(ArrayRef<ModuleAndSYCLMetadata> Modules,
StringRef Path) {
std::vector<std::string> Columns = {"Code", "Symbols"};
SYCLStringTable Table;
Table.emplace_back(std::move(Columns));
- for (const auto &[I, SM] : enumerate(SplitModules)) {
+ for (const auto &[I, SM] : enumerate(Modules)) {
std::string SymbolsFile = (Twine(Path) + "_" + Twine(I) + ".sym").str();
writeStringToFile(SM.Symbols, SymbolsFile);
std::vector<std::string> Row = {SM.ModuleFilePath, SymbolsFile};
@@ -126,11 +126,11 @@ Error runSYCLSplitModule(std::unique_ptr<Module> M) {
Settings.Mode = SYCLSplitMode;
Settings.OutputAssembly = OutputAssembly;
Settings.OutputPrefix = OutputFilename;
- auto SplitModulesOrErr = splitSYCLModule(std::move(M), Settings);
- if (!SplitModulesOrErr)
- return SplitModulesOrErr.takeError();
+ auto ModulesOrErr = SYCLSplitModule(std::move(M), Settings);
+ if (!ModulesOrErr)
+ return ModulesOrErr.takeError();
- writeSplitModulesAsTable(*SplitModulesOrErr, OutputFilename);
+ writeSplitModulesAsTable(*ModulesOrErr, OutputFilename);
return Error::success();
}
>From 122425cbfc2b6e3501d2adeaff1874d624787d68 Mon Sep 17 00:00:00 2001
From: "Sabianin, Maksim" <maksim.sabianin at intel.com>
Date: Fri, 31 Jan 2025 05:25:34 -0800
Subject: [PATCH 04/14] Resolve recent CR nits
---
llvm/lib/Transforms/Utils/SYCLSplitModule.cpp | 11 ++---------
llvm/tools/llvm-split/llvm-split.cpp | 12 +++++++-----
2 files changed, 9 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SYCLSplitModule.cpp b/llvm/lib/Transforms/Utils/SYCLSplitModule.cpp
index 0b1c116d738bc..fe926261623c3 100644
--- a/llvm/lib/Transforms/Utils/SYCLSplitModule.cpp
+++ b/llvm/lib/Transforms/Utils/SYCLSplitModule.cpp
@@ -106,7 +106,7 @@ class ModuleDesc {
ModuleDesc(std::unique_ptr<Module> M,
EntryPointGroup EntryPoints = EntryPointGroup())
: M(std::move(M)), EntryPoints(std::move(EntryPoints)) {
- assert(this->M && "Module should be non-empty");
+ assert(this->M && "Module should be non-null");
}
const EntryPointSet &entries() const { return EntryPoints.Functions; }
@@ -344,13 +344,6 @@ class ModuleSplitter {
} // namespace
-/// Gets attached attribute value if it is present. Otherwise returns empty
-/// stirng.
-static StringRef computeFunctionCategoryFromStringMetadata(const Function &F,
- StringRef AttrName) {
- return F.getFnAttribute(AttrName).getValueAsString();
-}
-
static EntryPointGroupVec selectEntryPointGroups(const Module &M,
IRSplitMode Mode) {
// std::map is used here to ensure stable ordering of entry point groups,
@@ -368,7 +361,7 @@ static EntryPointGroupVec selectEntryPointGroups(const Module &M,
Key = F.getName();
break;
case IRSplitMode::IRSM_PER_TU:
- Key = computeFunctionCategoryFromStringMetadata(F, ATTR_SYCL_MODULE_ID);
+ Key = F.getFnAttribute(ATTR_SYCL_MODULE_ID).getValueAsString();
break;
case IRSplitMode::IRSM_NONE:
llvm_unreachable("");
diff --git a/llvm/tools/llvm-split/llvm-split.cpp b/llvm/tools/llvm-split/llvm-split.cpp
index 8c14879fca7a5..58f06e91ab93b 100644
--- a/llvm/tools/llvm-split/llvm-split.cpp
+++ b/llvm/tools/llvm-split/llvm-split.cpp
@@ -76,17 +76,19 @@ static cl::opt<std::string>
MCPU("mcpu", cl::desc("Target CPU, ignored if --mtriple is not used"),
cl::value_desc("cpu"), cl::cat(SplitCategory));
-cl::opt<IRSplitMode> SYCLSplitMode(
- "sycl-split", cl::desc("module split mode"), cl::Optional,
- cl::init(IRSplitMode::IRSM_NONE),
+static cl::opt<IRSplitMode> SYCLSplitMode(
+ "sycl-split",
+ cl::desc("SYCL Split Mode. If present, SYCL splitting algorithm is used "
+ "with the specified mode."),
+ cl::Optional, cl::init(IRSplitMode::IRSM_NONE),
cl::values(clEnumValN(IRSplitMode::IRSM_PER_TU, "source",
"1 ouptput module per translation unit"),
clEnumValN(IRSplitMode::IRSM_PER_KERNEL, "kernel",
"1 output module per kernel")),
cl::cat(SplitCategory));
-cl::opt<bool> OutputAssembly{"S", cl::desc("Write output as LLVM assembly"),
- cl::cat(SplitCategory)};
+static cl::opt<bool> OutputAssembly{
+ "S", cl::desc("Write output as LLVM assembly"), cl::cat(SplitCategory)};
void writeStringToFile(std::string_view Content, StringRef Path) {
std::error_code EC;
>From 33b8865caa6abff0a5d2a3e068c1f59035046424 Mon Sep 17 00:00:00 2001
From: "Sabianin, Maksim" <maksim.sabianin at intel.com>
Date: Mon, 3 Mar 2025 06:10:41 -0800
Subject: [PATCH 05/14] Address some CR issues
---
.../llvm/Transforms/Utils/SYCLSplitModule.h | 6 +++---
.../include/llvm/Transforms/Utils/SYCLUtils.h | 6 +++---
llvm/lib/Transforms/Utils/SYCLSplitModule.cpp | 2 +-
llvm/lib/Transforms/Utils/SYCLUtils.cpp | 5 ++---
llvm/tools/llvm-split/llvm-split.cpp | 20 ++++++++++++-------
5 files changed, 22 insertions(+), 17 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/SYCLSplitModule.h b/llvm/include/llvm/Transforms/Utils/SYCLSplitModule.h
index eb57cc6d5e6e0..76235f63f2784 100644
--- a/llvm/include/llvm/Transforms/Utils/SYCLSplitModule.h
+++ b/llvm/include/llvm/Transforms/Utils/SYCLSplitModule.h
@@ -10,8 +10,8 @@
// of the split is new modules containing corresponding callgraph.
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SYCL_SPLIT_MODULE_H
-#define LLVM_SYCL_SPLIT_MODULE_H
+#ifndef LLVM_TRANSFORMS_UTILS_SYCLSPLITMODULE_H
+#define LLVM_TRANSFORMS_UTILS_SYCLSPLITMODULE_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
@@ -68,4 +68,4 @@ SYCLSplitModule(std::unique_ptr<Module> M, ModuleSplitterSettings Settings);
} // namespace llvm
-#endif // LLVM_SYCL_SPLIT_MODULE_H
+#endif // LLVM_TRANSFORMS_UTILS_SYCLSPLITMODULE_H
diff --git a/llvm/include/llvm/Transforms/Utils/SYCLUtils.h b/llvm/include/llvm/Transforms/Utils/SYCLUtils.h
index 53dec1139cd8e..75459eed6ac0f 100644
--- a/llvm/include/llvm/Transforms/Utils/SYCLUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/SYCLUtils.h
@@ -10,14 +10,14 @@
#ifndef LLVM_TRANSFORMS_UTILS_SYCLUTILS_H
#define LLVM_TRANSFORMS_UTILS_SYCLUTILS_H
-#include <string>
-#include <vector>
+#include <llvm/ADT/SmallString.h>
+#include <llvm/ADT/SmallVector.h>
namespace llvm {
class raw_ostream;
-using SYCLStringTable = std::vector<std::vector<std::string>>;
+using SYCLStringTable = SmallVector<SmallVector<SmallString<64>>>;
void writeSYCLStringTable(const SYCLStringTable &Table, raw_ostream &OS);
diff --git a/llvm/lib/Transforms/Utils/SYCLSplitModule.cpp b/llvm/lib/Transforms/Utils/SYCLSplitModule.cpp
index fe926261623c3..a6842337d201e 100644
--- a/llvm/lib/Transforms/Utils/SYCLSplitModule.cpp
+++ b/llvm/lib/Transforms/Utils/SYCLSplitModule.cpp
@@ -1,4 +1,4 @@
-//===-------- SYCLSplitModule.cpp - split a module into callgraphs --------===//
+//===-------- SYCLSplitModule.cpp - Split a module into call graphs -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Transforms/Utils/SYCLUtils.cpp b/llvm/lib/Transforms/Utils/SYCLUtils.cpp
index 7ae94e044bd42..d099a66855d01 100644
--- a/llvm/lib/Transforms/Utils/SYCLUtils.cpp
+++ b/llvm/lib/Transforms/Utils/SYCLUtils.cpp
@@ -15,11 +15,10 @@ namespace llvm {
void writeSYCLStringTable(const SYCLStringTable &Table, raw_ostream &OS) {
assert(Table.size() > 0 && "table should contain at least column titles");
- size_t numberColumns = Table[0].size();
- assert(numberColumns > 0 && "table should be non-empty");
+ assert(Table[0].size() > 0 && "table should be non-empty");
OS << '[' << join(Table[0].begin(), Table[0].end(), "|") << "]\n";
for (size_t I = 1, E = Table.size(); I != E; ++I) {
- assert(Table[I].size() == numberColumns && "row's size should be equal");
+ assert(Table[I].size() == Table[0].size() && "row's size should be equal");
OS << join(Table[I].begin(), Table[I].end(), "|") << '\n';
}
}
diff --git a/llvm/tools/llvm-split/llvm-split.cpp b/llvm/tools/llvm-split/llvm-split.cpp
index 58f06e91ab93b..bafc993c335c0 100644
--- a/llvm/tools/llvm-split/llvm-split.cpp
+++ b/llvm/tools/llvm-split/llvm-split.cpp
@@ -11,6 +11,8 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/LLVMContext.h"
@@ -32,9 +34,6 @@
#include "llvm/Transforms/Utils/SYCLUtils.h"
#include "llvm/Transforms/Utils/SplitModule.h"
-#include <string>
-#include <vector>
-
using namespace llvm;
static cl::OptionCategory SplitCategory("Split Options");
@@ -90,7 +89,7 @@ static cl::opt<IRSplitMode> SYCLSplitMode(
static cl::opt<bool> OutputAssembly{
"S", cl::desc("Write output as LLVM assembly"), cl::cat(SplitCategory)};
-void writeStringToFile(std::string_view Content, StringRef Path) {
+void writeStringToFile(StringRef Content, StringRef Path) {
std::error_code EC;
raw_fd_ostream OS(Path, EC);
if (EC) {
@@ -103,13 +102,19 @@ void writeStringToFile(std::string_view Content, StringRef Path) {
void writeSplitModulesAsTable(ArrayRef<ModuleAndSYCLMetadata> Modules,
StringRef Path) {
- std::vector<std::string> Columns = {"Code", "Symbols"};
+ SmallVector<SmallString<64>> Columns;
+ Columns.emplace_back("Code");
+ Columns.emplace_back("Symbols");
+
SYCLStringTable Table;
Table.emplace_back(std::move(Columns));
for (const auto &[I, SM] : enumerate(Modules)) {
- std::string SymbolsFile = (Twine(Path) + "_" + Twine(I) + ".sym").str();
+ SmallString<128> SymbolsFile;
+ (Twine(Path) + "_" + Twine(I) + ".sym").toVector(SymbolsFile);
writeStringToFile(SM.Symbols, SymbolsFile);
- std::vector<std::string> Row = {SM.ModuleFilePath, SymbolsFile};
+ SmallVector<SmallString<64>> Row;
+ Row.emplace_back(SM.ModuleFilePath);
+ Row.emplace_back(SymbolsFile);
Table.emplace_back(std::move(Row));
}
@@ -194,6 +199,7 @@ int main(int argc, char **argv) {
if (E) {
errs() << E << "\n";
Err.print(argv[0], errs());
+ return 1;
}
return 0;
>From 18843ffd770274dd4dbaa5cfca4bc72a858263d8 Mon Sep 17 00:00:00 2001
From: "Sabianin, Maksim" <maksim.sabianin at intel.com>
Date: Mon, 3 Mar 2025 08:31:18 -0800
Subject: [PATCH 06/14] Perform renaming and simplification of the test
---
.../device-code-split/auto-module-split-1.ll | 120 ------------------
.../SYCL/device-code-split/split-by-source.ll | 107 ++++++++++++++++
2 files changed, 107 insertions(+), 120 deletions(-)
delete mode 100644 llvm/test/tools/llvm-split/SYCL/device-code-split/auto-module-split-1.ll
create mode 100644 llvm/test/tools/llvm-split/SYCL/device-code-split/split-by-source.ll
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/auto-module-split-1.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/auto-module-split-1.ll
deleted file mode 100644
index 3734153b9fbaa..0000000000000
--- a/llvm/test/tools/llvm-split/SYCL/device-code-split/auto-module-split-1.ll
+++ /dev/null
@@ -1,120 +0,0 @@
-; RUN: llvm-split -sycl-split=source -S < %s -o %t
-; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-TU0,CHECK
-; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-TU1,CHECK
-; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-TU0-TXT
-; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-TU1-TXT
-
-target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
-target triple = "spir64-unknown-linux"
-
-$_Z3barIiET_S0_ = comdat any
-
-; CHECK-TU1-NOT: @{{.*}}GV{{.*}}
-; CHECK-TU0: @{{.*}}GV{{.*}} = internal addrspace(1) constant [1 x i32] [i32 42], align 4
- at _ZL2GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
-
-; CHECK-TU1: define dso_local spir_kernel void @{{.*}}TU0_kernel0{{.*}}
-; CHECK-TU1-TXT: {{.*}}TU0_kernel0{{.*}}
-; CHECK-TU0-NOT: define dso_local spir_kernel void @{{.*}}TU0_kernel0{{.*}}
-; CHECK-TU0-TXT-NOT: {{.*}}TU0_kernel0{{.*}}
-
-; CHECK-TU1: call spir_func void @{{.*}}foo{{.*}}()
-
-define dso_local spir_kernel void @_ZTSZ4mainE11TU0_kernel0() #0 {
-entry:
- call spir_func void @_Z3foov()
- ret void
-}
-
-; CHECK-TU1: define {{.*}} spir_func void @{{.*}}foo{{.*}}()
-; CHECK-TU0-NOT: define {{.*}} spir_func void @{{.*}}foo{{.*}}()
-
-; CHECK-TU1: call spir_func i32 @{{.*}}bar{{.*}}(i32 1)
-
-define dso_local spir_func void @_Z3foov() {
-entry:
- %a = alloca i32, align 4
- %call = call spir_func i32 @_Z3barIiET_S0_(i32 1)
- %add = add nsw i32 2, %call
- store i32 %add, ptr %a, align 4
- ret void
-}
-
-; CHECK-TU1: define {{.*}} spir_func i32 @{{.*}}bar{{.*}}(i32 %arg)
-; CHECK-TU0-NOT: define {{.*}} spir_func i32 @{{.*}}bar{{.*}}(i32 %arg)
-
-; Function Attrs: nounwind
-define linkonce_odr dso_local spir_func i32 @_Z3barIiET_S0_(i32 %arg) comdat {
-entry:
- %arg.addr = alloca i32, align 4
- store i32 %arg, ptr %arg.addr, align 4
- %0 = load i32, ptr %arg.addr, align 4
- ret i32 %0
-}
-
-; CHECK-TU1: define dso_local spir_kernel void @{{.*}}TU0_kernel1{{.*}}()
-; CHECK-TU1-TXT: {{.*}}TU0_kernel1{{.*}}
-; CHECK-TU0-NOT: define dso_local spir_kernel void @{{.*}}TU0_kernel1{{.*}}()
-; CHECK-TU0-TXT-NOT: {{.*}}TU0_kernel1{{.*}}
-
-; CHECK-TU1: call spir_func void @{{.*}}foo1{{.*}}()
-
-define dso_local spir_kernel void @_ZTSZ4mainE11TU0_kernel1() #0 {
-entry:
- call spir_func void @_Z4foo1v()
- ret void
-}
-
-; CHECK-TU1: define {{.*}} spir_func void @{{.*}}foo1{{.*}}()
-; CHECK-TU0-NOT: define {{.*}} spir_func void @{{.*}}foo1{{.*}}()
-
-; Function Attrs: nounwind
-define dso_local spir_func void @_Z4foo1v() {
-entry:
- %a = alloca i32, align 4
- store i32 2, ptr %a, align 4
- ret void
-}
-
-; CHECK-TU1-NOT: define dso_local spir_kernel void @{{.*}}TU1_kernel{{.*}}()
-; CHECK-TU1-TXT-NOT: {{.*}}TU1_kernel{{.*}}
-; CHECK-TU0: define dso_local spir_kernel void @{{.*}}TU1_kernel{{.*}}()
-; CHECK-TU0-TXT: {{.*}}TU1_kernel{{.*}}
-
-; CHECK-TU0: call spir_func void @{{.*}}foo2{{.*}}()
-
-define dso_local spir_kernel void @_ZTSZ4mainE10TU1_kernel() #1 {
-entry:
- call spir_func void @_Z4foo2v()
- ret void
-}
-
-; CHECK-TU1-NOT: define {{.*}} spir_func void @{{.*}}foo2{{.*}}()
-; CHECK-TU0: define {{.*}} spir_func void @{{.*}}foo2{{.*}}()
-
-; Function Attrs: nounwind
-define dso_local spir_func void @_Z4foo2v() {
-entry:
- %a = alloca i32, align 4
-; CHECK-TU0: %0 = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @{{.*}}GV{{.*}} to ptr addrspace(4)), align 4
- %0 = load i32, ptr addrspace(4) getelementptr inbounds ([1 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(1) @_ZL2GV to ptr addrspace(4)), i64 0, i64 0), align 4
- %add = add nsw i32 4, %0
- store i32 %add, ptr %a, align 4
- ret void
-}
-
-attributes #0 = { "sycl-module-id"="TU1.cpp" }
-attributes #1 = { "sycl-module-id"="TU2.cpp" }
-
-; Metadata is saved in both modules.
-; CHECK: !opencl.spir.version = !{!0, !0}
-; CHECK: !spirv.Source = !{!1, !1}
-
-!opencl.spir.version = !{!0, !0}
-!spirv.Source = !{!1, !1}
-
-; CHECK: !0 = !{i32 1, i32 2}
-; CHECK: !1 = !{i32 4, i32 100000}
-
-!0 = !{i32 1, i32 2}
-!1 = !{i32 4, i32 100000}
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/split-by-source.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/split-by-source.ll
new file mode 100644
index 0000000000000..02ce422be91b3
--- /dev/null
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/split-by-source.ll
@@ -0,0 +1,107 @@
+; Test checks that kernels are being split by attached TU metadata and
+; used functions are being moved with kernels that use them.
+
+; RUN: llvm-split -sycl-split=source -S < %s -o %t
+; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-TU0,CHECK
+; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-TU1,CHECK
+; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-TU0-TXT
+; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-TU1-TXT
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir64-unknown-linux"
+
+; CHECK-TU1-NOT: @GV
+; CHECK-TU0: @GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
+ at GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
+
+; CHECK-TU1: define dso_local spir_kernel void @TU1_kernel0
+; CHECK-TU1-TXT: TU1_kernel0
+; CHECK-TU0-NOT: define dso_local spir_kernel void @TU1_kernel0
+; CHECK-TU0-TXT-NOT: TU1_kernel0
+
+; CHECK-TU1: call spir_func void @func1_TU1()
+
+define dso_local spir_kernel void @TU1_kernel0() #0 {
+entry:
+ call spir_func void @func1_TU1()
+ ret void
+}
+
+; CHECK-TU1: define {{.*}} spir_func void @func1_TU1()
+; CHECK-TU0-NOT: define {{.*}} spir_func void @func1_TU1()
+
+; CHECK-TU1: call spir_func void @func2_TU1()
+
+define dso_local spir_func void @func1_TU1() {
+entry:
+ call spir_func void @func2_TU1()
+ ret void
+}
+
+; CHECK-TU1: define {{.*}} spir_func void @func2_TU1()
+; CHECK-TU0-NOT: define {{.*}} spir_func void @func2_TU1()
+
+define linkonce_odr dso_local spir_func void @func2_TU1() {
+entry:
+ ret void
+}
+
+; CHECK-TU1: define dso_local spir_kernel void @TU1_kernel1()
+; CHECK-TU1-TXT: TU1_kernel1
+; CHECK-TU0-NOT: define dso_local spir_kernel void @TU1_kernel1()
+; CHECK-TU0-TXT-NOT: TU1_kernel1
+
+; CHECK-TU1: call spir_func void @func3_TU1()
+
+define dso_local spir_kernel void @TU1_kernel1() #0 {
+entry:
+ call spir_func void @func3_TU1()
+ ret void
+}
+
+; CHECK-TU1: define {{.*}} spir_func void @func3_TU1()
+; CHECK-TU0-NOT: define {{.*}} spir_func void @func3_TU1()
+
+define dso_local spir_func void @func3_TU1() {
+entry:
+ ret void
+}
+
+; CHECK-TU1-NOT: define dso_local spir_kernel void @TU0_kernel()
+; CHECK-TU1-TXT-NOT: TU0_kernel
+; CHECK-TU0: define dso_local spir_kernel void @TU0_kernel()
+; CHECK-TU0-TXT: TU0_kernel
+
+; CHECK-TU0: call spir_func void @func_TU0()
+
+define dso_local spir_kernel void @TU0_kernel() #1 {
+entry:
+ call spir_func void @func_TU0()
+ ret void
+}
+
+; CHECK-TU1-NOT: define {{.*}} spir_func void @func_TU0()
+; CHECK-TU0: define {{.*}} spir_func void @func_TU0()
+
+define dso_local spir_func void @func_TU0() {
+entry:
+; CHECK-TU0: %0 = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @GV to ptr addrspace(4)), align 4
+ %0 = load i32, ptr addrspace(4) getelementptr inbounds ([1 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(1) @GV to ptr addrspace(4)), i64 0, i64 0), align 4
+ ret void
+}
+
+attributes #0 = { "sycl-module-id"="TU1.cpp" }
+attributes #1 = { "sycl-module-id"="TU2.cpp" }
+
+; Metadata is saved in both modules.
+; CHECK: !opencl.spir.version = !{!0, !0}
+; CHECK: !spirv.Source = !{!1, !1}
+
+!opencl.spir.version = !{!0, !0}
+!spirv.Source = !{!1, !1}
+
+; CHECK: !0 = !{i32 1, i32 2}
+; CHECK: !1 = !{i32 4, i32 100000}
+
+!0 = !{i32 1, i32 2}
+!1 = !{i32 4, i32 100000}
>From e9101e647df35b17e33cf497f5029e927d30ed95 Mon Sep 17 00:00:00 2001
From: "Sabianin, Maksim" <maksim.sabianin at intel.com>
Date: Tue, 4 Mar 2025 07:16:35 -0800
Subject: [PATCH 07/14] remove per-reqd-* tests since corresponding
functionality was excluded from the first PR
---
.../per-reqd-sub-group-size-split-1.ll | 105 ------------------
.../per-reqd-wg-size-split-1.ll | 105 ------------------
2 files changed, 210 deletions(-)
delete mode 100644 llvm/test/tools/llvm-split/SYCL/device-code-split/per-reqd-sub-group-size-split-1.ll
delete mode 100644 llvm/test/tools/llvm-split/SYCL/device-code-split/per-reqd-wg-size-split-1.ll
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/per-reqd-sub-group-size-split-1.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/per-reqd-sub-group-size-split-1.ll
deleted file mode 100644
index 921b7c22fc365..0000000000000
--- a/llvm/test/tools/llvm-split/SYCL/device-code-split/per-reqd-sub-group-size-split-1.ll
+++ /dev/null
@@ -1,105 +0,0 @@
-; The test checks that Module splitting correctly separates kernels
-; that use reqd_sub_group_size attributes from kernels which doesn't use them
-; regardless of device code split mode
-
-; This test emulates two translation units with 3 kernels:
-; TU0_kernel0 - 1st translation unit, no reqd_sub_group_size attribute used
-; TU0_kernel1 - 1st translation unit, reqd_sub_group_size attribute is used
-; TU1_kernel2 - 2nd translation unit, no reqd_sub_group_size attribute used
-
-; RUN: llvm-split -sycl-split=kernel -S %s -o %t
-; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \
-; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
-; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \
-; RUN: --implicit-check-not kernel0 --implicit-check-not kernel2
-; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M2-IR \
-; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2
-; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-M0-SYMS \
-; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
-; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M1-SYMS \
-; RUN: --implicit-check-not kernel0 --implicit-check-not kernel2
-; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M2-SYMS \
-; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2
-
-; Regardless of device code split mode, each kernel should go into a separate
-; device image
-
-; CHECK-M2-IR: define {{.*}} @TU0_kernel0
-; CHECK-M2-SYMS: TU0_kernel0
-
-; CHECK-M1-IR: define {{.*}} @TU0_kernel1
-; CHECK-M1-SYMS: TU0_kernel1
-
-; CHECK-M0-IR: define {{.*}} @TU1_kernel2
-; CHECK-M0-SYMS: TU1_kernel2
-
-target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
-target triple = "spir64-unknown-linux"
-
-; FIXME: device globals should also be properly distributed across device images
-; if they are of optional type
- at _ZL2GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
-
-define dso_local spir_kernel void @TU0_kernel0() #0 {
-entry:
- call spir_func void @foo()
- ret void
-}
-
-define dso_local spir_func void @foo() {
-entry:
- %a = alloca i32, align 4
- %call = call spir_func i32 @bar(i32 1)
- %add = add nsw i32 2, %call
- store i32 %add, i32* %a, align 4
- ret void
-}
-
-; Function Attrs: nounwind
-define linkonce_odr dso_local spir_func i32 @bar(i32 %arg) {
-entry:
- %arg.addr = alloca i32, align 4
- store i32 %arg, i32* %arg.addr, align 4
- %0 = load i32, i32* %arg.addr, align 4
- ret i32 %0
-}
-
-define dso_local spir_kernel void @TU0_kernel1() #0 !intel_reqd_sub_group_size !2 {
-entry:
- call spir_func void @foo1()
- ret void
-}
-
-; Function Attrs: nounwind
-define dso_local spir_func void @foo1() {
-entry:
- %a = alloca i32, align 4
- store i32 2, i32* %a, align 4
- ret void
-}
-
-define dso_local spir_kernel void @TU1_kernel2() #1 {
-entry:
- call spir_func void @foo2()
- ret void
-}
-
-; Function Attrs: nounwind
-define dso_local spir_func void @foo2() {
-entry:
- %a = alloca i32, align 4
- %0 = load i32, i32 addrspace(4)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(4)* addrspacecast ([1 x i32] addrspace(1)* @_ZL2GV to [1 x i32] addrspace(4)*), i64 0, i64 0), align 4
- %add = add nsw i32 4, %0
- store i32 %add, i32* %a, align 4
- ret void
-}
-
-attributes #0 = { "sycl-module-id"="TU1.cpp" }
-attributes #1 = { "sycl-module-id"="TU2.cpp" }
-
-!opencl.spir.version = !{!0, !0}
-!spirv.Source = !{!1, !1}
-
-!0 = !{i32 1, i32 2}
-!1 = !{i32 4, i32 100000}
-!2 = !{i32 32}
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/per-reqd-wg-size-split-1.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/per-reqd-wg-size-split-1.ll
deleted file mode 100644
index 2ca8b220edfbe..0000000000000
--- a/llvm/test/tools/llvm-split/SYCL/device-code-split/per-reqd-wg-size-split-1.ll
+++ /dev/null
@@ -1,105 +0,0 @@
-; The test checks that Module splitting correctly separates kernels
-; that use reqd_work_group_size attributes from kernels which doesn't use them
-; regardless of device code split mode
-
-; This test emulates two translation units with 3 kernels:
-; TU0_kernel0 - 1st translation unit, no reqd_work_group_size attribute used
-; TU0_kernel1 - 1st translation unit, reqd_work_group_size attribute is used
-; TU1_kernel2 - 2nd translation unit, no reqd_work_group_size attribute used
-
-; RUN: llvm-split -sycl-split=kernel -S < %s -o %t
-; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \
-; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
-; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \
-; RUN: --implicit-check-not kernel0 --implicit-check-not kernel2
-; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M2-IR \
-; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2
-; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-M0-SYMS \
-; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1
-; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M1-SYMS \
-; RUN: --implicit-check-not kernel0 --implicit-check-not kernel2
-; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M2-SYMS \
-; RUN: --implicit-check-not kernel1 --implicit-check-not kernel2
-
-; Regardless of device code split mode, each kernel should go into a separate
-; device image
-
-; CHECK-M2-IR: define {{.*}} @TU0_kernel0
-; CHECK-M2-SYMS: TU0_kernel0
-
-; CHECK-M1-IR: define {{.*}} @TU0_kernel1
-; CHECK-M1-SYMS: TU0_kernel1
-
-; CHECK-M0-IR: define {{.*}} @TU1_kernel2
-; CHECK-M0-SYMS: TU1_kernel2
-
-target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
-target triple = "spir64-unknown-linux"
-
-; FIXME: device globals should also be properly distributed across device images
-; if they are of optional type
- at _ZL2GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
-
-define dso_local spir_kernel void @TU0_kernel0() #0 {
-entry:
- call spir_func void @foo()
- ret void
-}
-
-define dso_local spir_func void @foo() {
-entry:
- %a = alloca i32, align 4
- %call = call spir_func i32 @bar(i32 1)
- %add = add nsw i32 2, %call
- store i32 %add, i32* %a, align 4
- ret void
-}
-
-; Function Attrs: nounwind
-define linkonce_odr dso_local spir_func i32 @bar(i32 %arg) {
-entry:
- %arg.addr = alloca i32, align 4
- store i32 %arg, i32* %arg.addr, align 4
- %0 = load i32, i32* %arg.addr, align 4
- ret i32 %0
-}
-
-define dso_local spir_kernel void @TU0_kernel1() #0 !reqd_work_group_size !2 {
-entry:
- call spir_func void @foo1()
- ret void
-}
-
-; Function Attrs: nounwind
-define dso_local spir_func void @foo1() {
-entry:
- %a = alloca i32, align 4
- store i32 2, i32* %a, align 4
- ret void
-}
-
-define dso_local spir_kernel void @TU1_kernel2() #1 {
-entry:
- call spir_func void @foo2()
- ret void
-}
-
-; Function Attrs: nounwind
-define dso_local spir_func void @foo2() {
-entry:
- %a = alloca i32, align 4
- %0 = load i32, i32 addrspace(4)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(4)* addrspacecast ([1 x i32] addrspace(1)* @_ZL2GV to [1 x i32] addrspace(4)*), i64 0, i64 0), align 4
- %add = add nsw i32 4, %0
- store i32 %add, i32* %a, align 4
- ret void
-}
-
-attributes #0 = { "sycl-module-id"="TU1.cpp" }
-attributes #1 = { "sycl-module-id"="TU2.cpp" }
-
-!opencl.spir.version = !{!0, !0}
-!spirv.Source = !{!1, !1}
-
-!0 = !{i32 1, i32 2}
-!1 = !{i32 4, i32 100000}
-!2 = !{i32 32}
>From 7e84a25991b3d5c72c891f9d7919675d1585ca17 Mon Sep 17 00:00:00 2001
From: "Sabianin, Maksim" <maksim.sabianin at intel.com>
Date: Tue, 4 Mar 2025 07:46:40 -0800
Subject: [PATCH 08/14] Rework split-with-kernel-declarations.ll.
Add source split mode checks.
Remove unnecessary metadata.
---
.../split-with-kernel-declarations.ll | 57 ++++++++++++-------
1 file changed, 35 insertions(+), 22 deletions(-)
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/split-with-kernel-declarations.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/split-with-kernel-declarations.ll
index 4ba15ecdefea6..1f188d8e32db6 100644
--- a/llvm/test/tools/llvm-split/SYCL/device-code-split/split-with-kernel-declarations.ll
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/split-with-kernel-declarations.ll
@@ -1,10 +1,30 @@
; The test checks that Module splitting does not treat declarations as entry points.
+; RUN: llvm-split -sycl-split=source -S < %s -o %t1
+; RUN: FileCheck %s -input-file=%t1.table --check-prefix CHECK-PER-SOURCE-TABLE
+; RUN: FileCheck %s -input-file=%t1_0.sym --check-prefix CHECK-PER-SOURCE-SYM0
+; RUN: FileCheck %s -input-file=%t1_1.sym --check-prefix CHECK-PER-SOURCE-SYM1
+
; RUN: llvm-split -sycl-split=kernel -S < %s -o %t2
; RUN: FileCheck %s -input-file=%t2.table --check-prefix CHECK-PER-KERNEL-TABLE
-; RUN: FileCheck %s -input-file=%t2_0.sym --check-prefix CHECK-PER-KERNEL-SYM1
-; RUN: FileCheck %s -input-file=%t2_1.sym --check-prefix CHECK-PER-KERNEL-SYM2
-; RUN: FileCheck %s -input-file=%t2_2.sym --check-prefix CHECK-PER-KERNEL-SYM0
+; RUN: FileCheck %s -input-file=%t2_0.sym --check-prefix CHECK-PER-KERNEL-SYM0
+; RUN: FileCheck %s -input-file=%t2_1.sym --check-prefix CHECK-PER-KERNEL-SYM1
+; RUN: FileCheck %s -input-file=%t2_2.sym --check-prefix CHECK-PER-KERNEL-SYM2
+
+; With per-source split, there should be two device images
+; CHECK-PER-SOURCE-TABLE: [Code|Symbols]
+; CHECK-PER-SOURCE-TABLE: {{.*}}_0.ll|{{.*}}_0.sym
+; CHECK-PER-SOURCE-TABLE-NEXT: {{.*}}_1.ll|{{.*}}_1.sym
+; CHECK-PER-SOURCE-TABLE-EMPTY:
+;
+; CHECK-PER-SOURCE-SYM0-NOT: TU1_kernel1
+; CHECK-PER-SOURCE-SYM0: TU1_kernel0
+; CHECK-PER-SOURCE-SYM0-EMPTY:
+;
+; CHECK-PER-SOURCE-SYM1-NOT: TU1_kernel1
+; CHECK-PER-SOURCE-SYM1: TU0_kernel0
+; CHECK-PER-SOURCE-SYM1-NEXT: TU0_kernel1
+; CHECK-PER-SOURCE-SYM1-EMPTY:
; With per-kernel split, there should be three device images
; CHECK-PER-KERNEL-TABLE: [Code|Symbols]
@@ -13,41 +33,34 @@
; CHECK-PER-KERNEL-TABLE-NEXT: {{.*}}_2.ll|{{.*}}_2.sym
; CHECK-PER-KERNEL-TABLE-EMPTY:
;
-; CHECK-PER-KERNEL-SYM0-NOT: _ZTS4mainE10TU1_kernel1
-; CHECK-PER-KERNEL-SYM0: _ZTSZ4mainE10TU1_kernel0
+; CHECK-PER-KERNEL-SYM0-NOT: TU1_kernel1
+; CHECK-PER-KERNEL-SYM0: TU1_kernel0
; CHECK-PER-KERNEL-SYM0-EMPTY:
;
-; CHECK-PER-KERNEL-SYM2-NOT: _ZTS4mainE10TU1_kernel1
-; CHECK-PER-KERNEL-SYM2: _ZTSZ4mainE11TU0_kernel0
-; CHECK-PER-KERNEL-SYM2-EMPTY:
-;
-; CHECK-PER-KERNEL-SYM1-NOT: _ZTS4mainE10TU1_kernel1
-; CHECK-PER-KERNEL-SYM1: _ZTSZ4mainE11TU0_kernel1
+; CHECK-PER-KERNEL-SYM1-NOT: TU1_kernel1
+; CHECK-PER-KERNEL-SYM1: TU0_kernel1
; CHECK-PER-KERNEL-SYM1-EMPTY:
+;
+; CHECK-PER-KERNEL-SYM2-NOT: TU1_kernel1
+; CHECK-PER-KERNEL-SYM2: TU0_kernel0
+; CHECK-PER-KERNEL-SYM2-EMPTY:
-target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
-target triple = "spir64-unknown-linux"
-define spir_kernel void @_ZTSZ4mainE11TU0_kernel0() #0 {
+define spir_kernel void @TU0_kernel0() #0 {
entry:
ret void
}
-define spir_kernel void @_ZTSZ4mainE11TU0_kernel1() #0 {
+define spir_kernel void @TU0_kernel1() #0 {
entry:
ret void
}
-define spir_kernel void @_ZTSZ4mainE10TU1_kernel0() #1 {
+define spir_kernel void @TU1_kernel0() #1 {
ret void
}
-declare spir_kernel void @_ZTS4mainE10TU1_kernel1() #1
+declare spir_kernel void @TU1_kernel1() #1
attributes #0 = { "sycl-module-id"="TU1.cpp" }
attributes #1 = { "sycl-module-id"="TU2.cpp" }
-
-!opencl.spir.version = !{!0, !0}
-!spirv.Source = !{!1, !1}
-!0 = !{i32 1, i32 2}
-!1 = !{i32 4, i32 100000}
>From cf807da8f911fe09b4ef48a8be1c3f1e007bac1d Mon Sep 17 00:00:00 2001
From: "Sabianin, Maksim" <maksim.sabianin at intel.com>
Date: Tue, 4 Mar 2025 08:01:19 -0800
Subject: [PATCH 09/14] remove basic-module-split.ll since it is a copy of
split-by-source.ll
---
.../device-code-split/basic-module-split.ll | 122 ------------------
1 file changed, 122 deletions(-)
delete mode 100644 llvm/test/tools/llvm-split/SYCL/device-code-split/basic-module-split.ll
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/basic-module-split.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/basic-module-split.ll
deleted file mode 100644
index a916fdfa82b76..0000000000000
--- a/llvm/test/tools/llvm-split/SYCL/device-code-split/basic-module-split.ll
+++ /dev/null
@@ -1,122 +0,0 @@
-; RUN: llvm-split -sycl-split=source -S < %s -o %t
-; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-TU0,CHECK
-; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-TU1,CHECK
-; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-TU0-TXT
-; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-TU1-TXT
-
-; ModuleID = 'basic-module-split.ll'
-source_filename = "basic-module-split.ll"
-target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
-target triple = "spir64-unknown-linux"
-
-$_Z3barIiET_S0_ = comdat any
-
-;CHECK-TU1-NOT: @{{.*}}GV{{.*}}
-;CHECK-TU0: @{{.*}}GV{{.*}} = internal addrspace(1) constant [1 x i32] [i32 42], align 4
- at _ZL2GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
-
-; CHECK-TU1: define dso_local spir_kernel void @{{.*}}TU0_kernel0{{.*}}
-; CHECK-TU1-TXT: {{.*}}TU0_kernel0{{.*}}
-; CHECK-TU0-NOT: define dso_local spir_kernel void @{{.*}}TU0_kernel0{{.*}}
-; CHECK-TU0-TXT-NOT: {{.*}}TU0_kernel0{{.*}}
-
-; CHECK-TU1: call spir_func void @{{.*}}foo{{.*}}()
-
-define dso_local spir_kernel void @_ZTSZ4mainE11TU0_kernel0() #0 {
-entry:
- call spir_func void @_Z3foov()
- ret void
-}
-
-; CHECK-TU1: define {{.*}} spir_func void @{{.*}}foo{{.*}}()
-; CHECK-TU0-NOT: define {{.*}} spir_func void @{{.*}}foo{{.*}}()
-
-; CHECK-TU1: call spir_func i32 @{{.*}}bar{{.*}}(i32 1)
-
-define dso_local spir_func void @_Z3foov() {
-entry:
- %a = alloca i32, align 4
- %call = call spir_func i32 @_Z3barIiET_S0_(i32 1)
- %add = add nsw i32 2, %call
- store i32 %add, ptr %a, align 4
- ret void
-}
-
-; CHECK-TU1: define {{.*}} spir_func i32 @{{.*}}bar{{.*}}(i32 %arg)
-; CHECK-TU0-NOT: define {{.*}} spir_func i32 @{{.*}}bar{{.*}}(i32 %arg)
-
-; Function Attrs: nounwind
-define linkonce_odr dso_local spir_func i32 @_Z3barIiET_S0_(i32 %arg) comdat {
-entry:
- %arg.addr = alloca i32, align 4
- store i32 %arg, ptr %arg.addr, align 4
- %0 = load i32, ptr %arg.addr, align 4
- ret i32 %0
-}
-
-; CHECK-TU1: define dso_local spir_kernel void @{{.*}}TU0_kernel1{{.*}}()
-; CHECK-TU1-TXT: {{.*}}TU0_kernel1{{.*}}
-; CHECK-TU0-NOT: define dso_local spir_kernel void @{{.*}}TU0_kernel1{{.*}}()
-; CHECK-TU0-TXT-NOT: {{.*}}TU0_kernel1{{.*}}
-
-; CHECK-TU1: call spir_func void @{{.*}}foo1{{.*}}()
-
-define dso_local spir_kernel void @_ZTSZ4mainE11TU0_kernel1() #0 {
-entry:
- call spir_func void @_Z4foo1v()
- ret void
-}
-
-; CHECK-TU1: define {{.*}} spir_func void @{{.*}}foo1{{.*}}()
-; CHECK-TU0-NOT: define {{.*}} spir_func void @{{.*}}foo1{{.*}}()
-
-; Function Attrs: nounwind
-define dso_local spir_func void @_Z4foo1v() {
-entry:
- %a = alloca i32, align 4
- store i32 2, ptr %a, align 4
- ret void
-}
-
-; CHECK-TU1-NOT: define dso_local spir_kernel void @{{.*}}TU1_kernel{{.*}}()
-; CHECK-TU1-TXT-NOT: {{.*}}TU1_kernel{{.*}}
-; CHECK-TU0: define dso_local spir_kernel void @{{.*}}TU1_kernel{{.*}}()
-; CHECK-TU0-TXT: {{.*}}TU1_kernel{{.*}}
-
-; CHECK-TU0: call spir_func void @{{.*}}foo2{{.*}}()
-
-define dso_local spir_kernel void @_ZTSZ4mainE10TU1_kernel() #1 {
-entry:
- call spir_func void @_Z4foo2v()
- ret void
-}
-
-; CHECK-TU1-NOT: define {{.*}} spir_func void @{{.*}}foo2{{.*}}()
-; CHECK-TU0: define {{.*}} spir_func void @{{.*}}foo2{{.*}}()
-
-; Function Attrs: nounwind
-define dso_local spir_func void @_Z4foo2v() {
-entry:
- %a = alloca i32, align 4
-; CHECK-TU0: %0 = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @{{.*}}GV{{.*}} to ptr addrspace(4)), align 4
- %0 = load i32, ptr addrspace(4) getelementptr inbounds ([1 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(1) @_ZL2GV to ptr addrspace(4)), i64 0, i64 0), align 4
- %add = add nsw i32 4, %0
- store i32 %add, ptr %a, align 4
- ret void
-}
-
-attributes #0 = { "sycl-module-id"="TU1.cpp" }
-attributes #1 = { "sycl-module-id"="TU2.cpp" }
-
-; Metadata is saved in both modules.
-; CHECK: !opencl.spir.version = !{!0, !0}
-; CHECK: !spirv.Source = !{!1, !1}
-
-!opencl.spir.version = !{!0, !0}
-!spirv.Source = !{!1, !1}
-
-; CHECK; !0 = !{i32 1, i32 2}
-; CHECK; !1 = !{i32 4, i32 100000}
-
-!0 = !{i32 1, i32 2}
-!1 = !{i32 4, i32 100000}
>From 92ee37bcff5d6f17e045df417153f372958c6758 Mon Sep 17 00:00:00 2001
From: "Sabianin, Maksim" <maksim.sabianin at intel.com>
Date: Tue, 4 Mar 2025 08:13:08 -0800
Subject: [PATCH 10/14] Simplify and remove trash
---
.../auto-module-split-func-ptr.ll | 29 ++---
.../complex-indirect-call-chain.ll | 3 -
.../one-kernel-per-module.ll | 115 +++++++-----------
.../SYCL/device-code-split/split-by-source.ll | 3 -
4 files changed, 58 insertions(+), 92 deletions(-)
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/auto-module-split-func-ptr.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/auto-module-split-func-ptr.ll
index 2e3d2e5e55c9b..6de99d21b8751 100644
--- a/llvm/test/tools/llvm-split/SYCL/device-code-split/auto-module-split-func-ptr.ll
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/auto-module-split-func-ptr.ll
@@ -1,5 +1,5 @@
-; This test checks that we can properly perform device code split by tracking
-; all uses of functions (not only direct calls)
+; This test checks that Module splitting can properly perform device code split by tracking
+; all uses of functions (not only direct calls).
; RUN: llvm-split -sycl-split=source -S < %s -o %t
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix=CHECK-SYM0
@@ -12,39 +12,32 @@
;
; CHECK-IR0: define dso_local spir_kernel void @kernel2
;
-; CHECK-IR1: @_Z2f1iTable = weak global ptr @_Z2f1i
-; CHECK-IR1: define {{.*}} i32 @_Z2f1i
+; CHECK-IR1: @FuncTable = weak global ptr @func
+; CHECK-IR1: define {{.*}} i32 @func
; CHECK-IR1: define weak_odr dso_local spir_kernel void @kernel1
-target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
-target triple = "spirv64-unknown-unknown"
+ at FuncTable = weak global ptr @func, align 8
- at _Z2f1iTable = weak global ptr @_Z2f1i, align 8
-
-; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
-define dso_local spir_func i32 @_Z2f1i(i32 %a) #0 {
+define dso_local spir_func i32 @func(i32 %a) {
entry:
ret i32 %a
}
-; Function Attrs: convergent norecurse
-define weak_odr dso_local spir_kernel void @kernel1() #1 {
+define weak_odr dso_local spir_kernel void @kernel1() #0 {
entry:
- %0 = call i32 @indirect_call(ptr addrspace(4) addrspacecast ( ptr getelementptr inbounds ( [1 x ptr] , ptr @_Z2f1iTable, i64 0, i64 0) to ptr addrspace(4)), i32 0)
+ %0 = call i32 @indirect_call(ptr addrspace(4) addrspacecast ( ptr getelementptr inbounds ( [1 x ptr] , ptr @FuncTable, i64 0, i64 0) to ptr addrspace(4)), i32 0)
ret void
}
-; Function Attrs: convergent norecurse
-define dso_local spir_kernel void @kernel2() #2 {
+define dso_local spir_kernel void @kernel2() #1 {
entry:
ret void
}
declare dso_local spir_func i32 @indirect_call(ptr addrspace(4), i32) local_unnamed_addr
-attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn }
-attributes #1 = { convergent norecurse "sycl-module-id"="TU1.cpp" }
-attributes #2 = { convergent norecurse "sycl-module-id"="TU2.cpp" }
+attributes #0 = { "sycl-module-id"="TU1.cpp" }
+attributes #1 = { "sycl-module-id"="TU2.cpp" }
; CHECK: kernel1
; CHECK: kernel2
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/complex-indirect-call-chain.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/complex-indirect-call-chain.ll
index 1e92034c156bf..dcf600f06a57a 100644
--- a/llvm/test/tools/llvm-split/SYCL/device-code-split/complex-indirect-call-chain.ll
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/complex-indirect-call-chain.ll
@@ -26,9 +26,6 @@
; CHECK2-DAG: define spir_kernel void @kernel_A
; CHECK2-DAG: define {{.*}}spir_func void @baz
-target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
-target triple = "spir64-unknown-unknown"
-
define spir_func i32 @foo(i32 (i32, void ()*)* %ptr1, void ()* %ptr2) {
%1 = call spir_func i32 %ptr1(i32 42, void ()* %ptr2)
ret i32 %1
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/one-kernel-per-module.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/one-kernel-per-module.ll
index ddb0ea0b3c59a..b8d5b2f2e53be 100644
--- a/llvm/test/tools/llvm-split/SYCL/device-code-split/one-kernel-per-module.ll
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/one-kernel-per-module.ll
@@ -8,113 +8,92 @@
; RUN: FileCheck %s -input-file=%t.files_2.ll --check-prefixes CHECK-MODULE2,CHECK
; RUN: FileCheck %s -input-file=%t.files_2.sym --check-prefixes CHECK-MODULE2-TXT
-; ModuleID = 'one-kernel-per-module.ll'
-source_filename = "one-kernel-per-module.ll"
-target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
-target triple = "spirv64-unknown-unknown"
+;CHECK-MODULE2-NOT: @GV
+;CHECK-MODULE1-NOT: @GV
+;CHECK-MODULE0: @GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
+ at GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
-$_Z3barIiET_S0_ = comdat any
+; CHECK-MODULE2: define dso_local spir_kernel void @TU0_kernel0
+; CHECK-MODULE2-TXT: TU0_kernel0
+; CHECK-MODULE1-NOT: define dso_local spir_kernel void @TU0_kernel0
+; CHECK-MODULE1-TXT-NOT: TU0_kernel0
-;CHECK-MODULE2-NOT: @{{.*}}GV{{.*}}
-;CHECK-MODULE1-NOT: @{{.*}}GV{{.*}}
-;CHECK-MODULE0: @{{.*}}GV{{.*}} = internal addrspace(1) constant [1 x i32] [i32 42], align 4
- at _ZL2GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
-
-; CHECK-MODULE2: define dso_local spir_kernel void @{{.*}}TU0_kernel0{{.*}}
-; CHECK-MODULE2-TXT: {{.*}}TU0_kernel0{{.*}}
-; CHECK-MODULE1-NOT: define dso_local spir_kernel void @{{.*}}TU0_kernel0{{.*}}
-; CHECK-MODULE1-TXT-NOT: {{.*}}TU0_kernel0{{.*}}
-
-; CHECK-MODULE2: call spir_func void @{{.*}}foo{{.*}}()
+; CHECK-MODULE2: call spir_func void @foo()
define dso_local spir_kernel void @TU0_kernel0() #0 {
entry:
- call spir_func void @_Z3foov()
+ call spir_func void @foo()
ret void
}
-; CHECK-MODULE2: define {{.*}} spir_func void @{{.*}}foo{{.*}}()
-; CHECK-MODULE1-NOT: define {{.*}} spir_func void @{{.*}}foo{{.*}}()
-; CHECK-MODULE0-NOT: define {{.*}} spir_func void @{{.*}}foo{{.*}}()
+; CHECK-MODULE2: define {{.*}} spir_func void @foo()
+; CHECK-MODULE1-NOT: define {{.*}} spir_func void @foo()
+; CHECK-MODULE0-NOT: define {{.*}} spir_func void @foo()
-; CHECK-MODULE2: call spir_func i32 @{{.*}}bar{{.*}}(i32 1)
+; CHECK-MODULE2: call spir_func void @bar()
-define dso_local spir_func void @_Z3foov() {
+define dso_local spir_func void @foo() {
entry:
- %a = alloca i32, align 4
- %call = call spir_func i32 @_Z3barIiET_S0_(i32 1)
- %add = add nsw i32 2, %call
- store i32 %add, ptr %a, align 4
+ call spir_func void @bar()
ret void
}
-; CHECK-MODULE2: define {{.*}} spir_func i32 @{{.*}}bar{{.*}}(i32 %arg)
-; CHECK-MODULE1-NOT: define {{.*}} spir_func i32 @{{.*}}bar{{.*}}(i32 %arg)
-; CHECK-MODULE0-NOT: define {{.*}} spir_func i32 @{{.*}}bar{{.*}}(i32 %arg)
+; CHECK-MODULE2: define {{.*}} spir_func void @bar()
+; CHECK-MODULE1-NOT: define {{.*}} spir_func void @bar()
+; CHECK-MODULE0-NOT: define {{.*}} spir_func void @bar()
-; Function Attrs: nounwind
-define linkonce_odr dso_local spir_func i32 @_Z3barIiET_S0_(i32 %arg) comdat {
+define linkonce_odr dso_local spir_func void @bar() {
entry:
- %arg.addr = alloca i32, align 4
- store i32 %arg, ptr %arg.addr, align 4
- %0 = load i32, ptr %arg.addr, align 4
- ret i32 %0
+ ret void
}
-; CHECK-MODULE2-NOT: define dso_local spir_kernel void @{{.*}}TU0_kernel1{{.*}}()
-; CHECK-MODULE2-TXT-NOT: {{.*}}TU0_kernel1{{.*}}
-; CHECK-MODULE1: define dso_local spir_kernel void @{{.*}}TU0_kernel1{{.*}}()
-; CHECK-MODULE1-TXT: {{.*}}TU0_kernel1{{.*}}
-; CHECK-MODULE0-NOT: define dso_local spir_kernel void @{{.*}}TU0_kernel1{{.*}}()
-; CHECK-MODULE0-TXT-NOT: {{.*}}TU0_kernel1{{.*}}
+; CHECK-MODULE2-NOT: define dso_local spir_kernel void @TU0_kernel1()
+; CHECK-MODULE2-TXT-NOT: TU0_kernel1
+; CHECK-MODULE1: define dso_local spir_kernel void @TU0_kernel1()
+; CHECK-MODULE1-TXT: TU0_kernel1
+; CHECK-MODULE0-NOT: define dso_local spir_kernel void @TU0_kernel1()
+; CHECK-MODULE0-TXT-NOT: TU0_kernel1
-; CHECK-MODULE1: call spir_func void @{{.*}}foo1{{.*}}()
+; CHECK-MODULE1: call spir_func void @foo1()
define dso_local spir_kernel void @TU0_kernel1() #0 {
entry:
- call spir_func void @_Z4foo1v()
+ call spir_func void @foo1()
ret void
}
-; CHECK-MODULE2-NOT: define {{.*}} spir_func void @{{.*}}foo1{{.*}}()
-; CHECK-MODULE1: define {{.*}} spir_func void @{{.*}}foo1{{.*}}()
-; CHECK-MODULE0-NOT: define {{.*}} spir_func void @{{.*}}foo1{{.*}}()
+; CHECK-MODULE2-NOT: define {{.*}} spir_func void @foo1()
+; CHECK-MODULE1: define {{.*}} spir_func void @foo1()
+; CHECK-MODULE0-NOT: define {{.*}} spir_func void @foo1()
-; Function Attrs: nounwind
-define dso_local spir_func void @_Z4foo1v() {
+define dso_local spir_func void @foo1() {
entry:
- %a = alloca i32, align 4
- store i32 2, ptr %a, align 4
ret void
}
-; CHECK-MODULE2-NOT: define dso_local spir_kernel void @{{.*}}TU1_kernel{{.*}}()
-; CHECK-MODULE2-TXT-NOT: {{.*}}TU1_kernel{{.*}}
-; CHECK-MODULE1-NOT: define dso_local spir_kernel void @{{.*}}TU1_kernel{{.*}}()
-; CHECK-MODULE1-TXT-NOT: {{.*}}TU1_kernel{{.*}}
-; CHECK-MODULE0: define dso_local spir_kernel void @{{.*}}TU1_kernel{{.*}}()
-; CHECK-MODULE0-TXT: {{.*}}TU1_kernel{{.*}}
+; CHECK-MODULE2-NOT: define dso_local spir_kernel void @TU1_kernel()
+; CHECK-MODULE2-TXT-NOT: TU1_kernel
+; CHECK-MODULE1-NOT: define dso_local spir_kernel void @TU1_kernel()
+; CHECK-MODULE1-TXT-NOT: TU1_kernel
+; CHECK-MODULE0: define dso_local spir_kernel void @TU1_kernel()
+; CHECK-MODULE0-TXT: TU1_kernel
-; CHECK-MODULE0: call spir_func void @{{.*}}foo2{{.*}}()
+; CHECK-MODULE0: call spir_func void @foo2()
define dso_local spir_kernel void @TU1_kernel() #1 {
entry:
- call spir_func void @_Z4foo2v()
+ call spir_func void @foo2()
ret void
}
-; CHECK-MODULE2-NOT: define {{.*}} spir_func void @{{.*}}foo2{{.*}}()
-; CHECK-MODULE1-NOT: define {{.*}} spir_func void @{{.*}}foo2{{.*}}()
-; CHECK-MODULE0: define {{.*}} spir_func void @{{.*}}foo2{{.*}}()
+; CHECK-MODULE2-NOT: define {{.*}} spir_func void @foo2()
+; CHECK-MODULE1-NOT: define {{.*}} spir_func void @foo2()
+; CHECK-MODULE0: define {{.*}} spir_func void @foo2()
-; Function Attrs: nounwind
-define dso_local spir_func void @_Z4foo2v() {
+define dso_local spir_func void @foo2() {
entry:
- %a = alloca i32, align 4
-; CHECK-MODULE0: %0 = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @{{.*}}GV{{.*}} to ptr addrspace(4)), align 4
- %0 = load i32, ptr addrspace(4) getelementptr inbounds ([1 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(1) @_ZL2GV to ptr addrspace(4)), i64 0, i64 0), align 4
- %add = add nsw i32 4, %0
- store i32 %add, ptr %a, align 4
+; CHECK-MODULE0: %0 = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @GV to ptr addrspace(4)), align 4
+ %0 = load i32, ptr addrspace(4) getelementptr inbounds ([1 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(1) @GV to ptr addrspace(4)), i64 0, i64 0), align 4
ret void
}
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/split-by-source.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/split-by-source.ll
index 02ce422be91b3..cde7ed9dc37ef 100644
--- a/llvm/test/tools/llvm-split/SYCL/device-code-split/split-by-source.ll
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/split-by-source.ll
@@ -7,9 +7,6 @@
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-TU0-TXT
; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-TU1-TXT
-target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
-target triple = "spir64-unknown-linux"
-
; CHECK-TU1-NOT: @GV
; CHECK-TU0: @GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
@GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
>From 306e3f69a14f871ae15cd9f3da94782c81e683e7 Mon Sep 17 00:00:00 2001
From: "Sabianin, Maksim" <maksim.sabianin at intel.com>
Date: Tue, 4 Mar 2025 08:13:39 -0800
Subject: [PATCH 11/14] rename the test
---
.../{auto-module-split-func-ptr.ll => module-split-func-ptr.ll} | 0
1 file changed, 0 insertions(+), 0 deletions(-)
rename llvm/test/tools/llvm-split/SYCL/device-code-split/{auto-module-split-func-ptr.ll => module-split-func-ptr.ll} (100%)
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/auto-module-split-func-ptr.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/module-split-func-ptr.ll
similarity index 100%
rename from llvm/test/tools/llvm-split/SYCL/device-code-split/auto-module-split-func-ptr.ll
rename to llvm/test/tools/llvm-split/SYCL/device-code-split/module-split-func-ptr.ll
>From e22302315d27fcaa79d1c6cf8144cfd769caa56f Mon Sep 17 00:00:00 2001
From: "Sabianin, Maksim" <maksim.sabianin at intel.com>
Date: Tue, 4 Mar 2025 09:02:13 -0800
Subject: [PATCH 12/14] add source mode to complex-indirect-call-chain.ll
---
.../device-code-split/complex-indirect-call-chain.ll | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/complex-indirect-call-chain.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/complex-indirect-call-chain.ll
index dcf600f06a57a..e91d932a5774e 100644
--- a/llvm/test/tools/llvm-split/SYCL/device-code-split/complex-indirect-call-chain.ll
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/complex-indirect-call-chain.ll
@@ -1,6 +1,18 @@
; Check that Module splitting can trace through more complex call stacks
; involving several nested indirect calls.
+;
+; RUN: llvm-split -sycl-split=source -S < %s -o %t
+; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK0 \
+; RUN: --implicit-check-not @foo --implicit-check-not @kernel_A \
+; RUN: --implicit-check-not @kernel_B --implicit-check-not @baz
+; RUN: FileCheck %s -input-file=%t_1.ll --check-prefix CHECK1 \
+; RUN: --implicit-check-not @kernel_A --implicit-check-not @kernel_C
+; RUN: FileCheck %s -input-file=%t_2.ll --check-prefix CHECK2 \
+; RUN: --implicit-check-not @foo --implicit-check-not @bar \
+; RUN: --implicit-check-not @BAZ --implicit-check-not @kernel_B \
+; RUN: --implicit-check-not @kernel_C
+
; RUN: llvm-split -sycl-split=kernel -S < %s -o %t
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK0 \
; RUN: --implicit-check-not @foo --implicit-check-not @kernel_A \
>From 0a8cc6f1a24d006fdc458324c8784d985d776fe0 Mon Sep 17 00:00:00 2001
From: "Sabianin, Maksim" <maksim.sabianin at intel.com>
Date: Mon, 10 Mar 2025 08:34:22 -0700
Subject: [PATCH 13/14] rework tests
---
.../device-code-split/amd-kernel-split.ll | 12 ++--
.../complex-indirect-call-chain.ll | 1 -
.../module-split-func-ptr.ll | 6 +-
.../one-kernel-per-module.ll | 72 +++++++++----------
.../SYCL/device-code-split/split-by-source.ll | 49 ++++++-------
5 files changed, 63 insertions(+), 77 deletions(-)
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/amd-kernel-split.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/amd-kernel-split.ll
index 6b0305d12400f..a40a52107fb0c 100644
--- a/llvm/test/tools/llvm-split/SYCL/device-code-split/amd-kernel-split.ll
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/amd-kernel-split.ll
@@ -3,15 +3,15 @@
; RUN: FileCheck %s -input-file=%tC_0.ll --check-prefixes CHECK-A0
; RUN: FileCheck %s -input-file=%tC_1.ll --check-prefixes CHECK-A1
-define dso_local amdgpu_kernel void @Kernel1() {
+define dso_local amdgpu_kernel void @KernelA() {
ret void
}
-define dso_local amdgpu_kernel void @Kernel2() {
+define dso_local amdgpu_kernel void @KernelB() {
ret void
}
-; CHECK-A0: define dso_local amdgpu_kernel void @Kernel2()
-; CHECK-A0-NOT: define dso_local amdgpu_kernel void @Kernel1()
-; CHECK-A1-NOT: define dso_local amdgpu_kernel void @Kernel2()
-; CHECK-A1: define dso_local amdgpu_kernel void @Kernel1()
+; CHECK-A0: define dso_local amdgpu_kernel void @KernelB()
+; CHECK-A0-NOT: define dso_local amdgpu_kernel void @KernelA()
+; CHECK-A1-NOT: define dso_local amdgpu_kernel void @KernelB()
+; CHECK-A1: define dso_local amdgpu_kernel void @KernelA()
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/complex-indirect-call-chain.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/complex-indirect-call-chain.ll
index e91d932a5774e..5a25e491b1b93 100644
--- a/llvm/test/tools/llvm-split/SYCL/device-code-split/complex-indirect-call-chain.ll
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/complex-indirect-call-chain.ll
@@ -1,7 +1,6 @@
; Check that Module splitting can trace through more complex call stacks
; involving several nested indirect calls.
-;
; RUN: llvm-split -sycl-split=source -S < %s -o %t
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK0 \
; RUN: --implicit-check-not @foo --implicit-check-not @kernel_A \
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/module-split-func-ptr.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/module-split-func-ptr.ll
index 6de99d21b8751..03dcd6d9b3e8e 100644
--- a/llvm/test/tools/llvm-split/SYCL/device-code-split/module-split-func-ptr.ll
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/module-split-func-ptr.ll
@@ -7,10 +7,10 @@
; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix=CHECK-IR0
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefix=CHECK-IR1
-; CHECK-SYM0: kernel2
+; CHECK-SYM0: kernelA
; CHECK-SYM1: kernel1
;
-; CHECK-IR0: define dso_local spir_kernel void @kernel2
+; CHECK-IR0: define dso_local spir_kernel void @kernelA
;
; CHECK-IR1: @FuncTable = weak global ptr @func
; CHECK-IR1: define {{.*}} i32 @func
@@ -29,7 +29,7 @@ entry:
ret void
}
-define dso_local spir_kernel void @kernel2() #1 {
+define dso_local spir_kernel void @kernelA() #1 {
entry:
ret void
}
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/one-kernel-per-module.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/one-kernel-per-module.ll
index b8d5b2f2e53be..b949ab7530f39 100644
--- a/llvm/test/tools/llvm-split/SYCL/device-code-split/one-kernel-per-module.ll
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/one-kernel-per-module.ll
@@ -8,88 +8,82 @@
; RUN: FileCheck %s -input-file=%t.files_2.ll --check-prefixes CHECK-MODULE2,CHECK
; RUN: FileCheck %s -input-file=%t.files_2.sym --check-prefixes CHECK-MODULE2-TXT
-;CHECK-MODULE2-NOT: @GV
-;CHECK-MODULE1-NOT: @GV
;CHECK-MODULE0: @GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
+;CHECK-MODULE1-NOT: @GV
+;CHECK-MODULE2-NOT: @GV
@GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
-; CHECK-MODULE2: define dso_local spir_kernel void @TU0_kernel0
-; CHECK-MODULE2-TXT: TU0_kernel0
-; CHECK-MODULE1-NOT: define dso_local spir_kernel void @TU0_kernel0
-; CHECK-MODULE1-TXT-NOT: TU0_kernel0
-
-; CHECK-MODULE2: call spir_func void @foo()
+; CHECK-MODULE0-TXT-NOT: T0_kernelA
+; CHECK-MODULE1-TXT-NOT: TU0_kernelA
+; CHECK-MODULE2-TXT: TU0_kernelA
-define dso_local spir_kernel void @TU0_kernel0() #0 {
+; CHECK-MODULE0-NOT: define dso_local spir_kernel void @TU0_kernelA
+; CHECK-MODULE1-NOT: define dso_local spir_kernel void @TU0_kernelA
+; CHECK-MODULE2: define dso_local spir_kernel void @TU0_kernelA
+define dso_local spir_kernel void @TU0_kernelA() #0 {
entry:
+; CHECK-MODULE2: call spir_func void @foo()
call spir_func void @foo()
ret void
}
-; CHECK-MODULE2: define {{.*}} spir_func void @foo()
-; CHECK-MODULE1-NOT: define {{.*}} spir_func void @foo()
; CHECK-MODULE0-NOT: define {{.*}} spir_func void @foo()
-
-; CHECK-MODULE2: call spir_func void @bar()
-
+; CHECK-MODULE1-NOT: define {{.*}} spir_func void @foo()
+; CHECK-MODULE2: define {{.*}} spir_func void @foo()
define dso_local spir_func void @foo() {
entry:
+; CHECK-MODULE2: call spir_func void @bar()
call spir_func void @bar()
ret void
}
-; CHECK-MODULE2: define {{.*}} spir_func void @bar()
-; CHECK-MODULE1-NOT: define {{.*}} spir_func void @bar()
; CHECK-MODULE0-NOT: define {{.*}} spir_func void @bar()
-
+; CHECK-MODULE1-NOT: define {{.*}} spir_func void @bar()
+; CHECK-MODULE2: define {{.*}} spir_func void @bar()
define linkonce_odr dso_local spir_func void @bar() {
entry:
ret void
}
-; CHECK-MODULE2-NOT: define dso_local spir_kernel void @TU0_kernel1()
-; CHECK-MODULE2-TXT-NOT: TU0_kernel1
-; CHECK-MODULE1: define dso_local spir_kernel void @TU0_kernel1()
-; CHECK-MODULE1-TXT: TU0_kernel1
-; CHECK-MODULE0-NOT: define dso_local spir_kernel void @TU0_kernel1()
-; CHECK-MODULE0-TXT-NOT: TU0_kernel1
+; CHECK-MODULE0-TXT-NOT: TU0_kernelB
+; CHECK-MODULE1-TXT: TU0_kernelB
+; CHECK-MODULE2-TXT-NOT: TU0_kernelB
-; CHECK-MODULE1: call spir_func void @foo1()
-
-define dso_local spir_kernel void @TU0_kernel1() #0 {
+; CHECK-MODULE0-NOT: define dso_local spir_kernel void @TU0_kernelB()
+; CHECK-MODULE1: define dso_local spir_kernel void @TU0_kernelB()
+; CHECK-MODULE2-NOT: define dso_local spir_kernel void @TU0_kernelB()
+define dso_local spir_kernel void @TU0_kernelB() #0 {
entry:
+; CHECK-MODULE1: call spir_func void @foo1()
call spir_func void @foo1()
ret void
}
-; CHECK-MODULE2-NOT: define {{.*}} spir_func void @foo1()
-; CHECK-MODULE1: define {{.*}} spir_func void @foo1()
; CHECK-MODULE0-NOT: define {{.*}} spir_func void @foo1()
-
+; CHECK-MODULE1: define {{.*}} spir_func void @foo1()
+; CHECK-MODULE2-NOT: define {{.*}} spir_func void @foo1()
define dso_local spir_func void @foo1() {
entry:
ret void
}
-; CHECK-MODULE2-NOT: define dso_local spir_kernel void @TU1_kernel()
-; CHECK-MODULE2-TXT-NOT: TU1_kernel
-; CHECK-MODULE1-NOT: define dso_local spir_kernel void @TU1_kernel()
-; CHECK-MODULE1-TXT-NOT: TU1_kernel
-; CHECK-MODULE0: define dso_local spir_kernel void @TU1_kernel()
; CHECK-MODULE0-TXT: TU1_kernel
+; CHECK-MODULE1-TXT-NOT: TU1_kernel
+; CHECK-MODULE2-TXT-NOT: TU1_kernel
-; CHECK-MODULE0: call spir_func void @foo2()
-
+; CHECK-MODULE0: define dso_local spir_kernel void @TU1_kernel()
+; CHECK-MODULE1-NOT: define dso_local spir_kernel void @TU1_kernel()
+; CHECK-MODULE2-NOT: define dso_local spir_kernel void @TU1_kernel()
define dso_local spir_kernel void @TU1_kernel() #1 {
entry:
+; CHECK-MODULE0: call spir_func void @foo2()
call spir_func void @foo2()
ret void
}
-; CHECK-MODULE2-NOT: define {{.*}} spir_func void @foo2()
-; CHECK-MODULE1-NOT: define {{.*}} spir_func void @foo2()
; CHECK-MODULE0: define {{.*}} spir_func void @foo2()
-
+; CHECK-MODULE1-NOT: define {{.*}} spir_func void @foo2()
+; CHECK-MODULE2-NOT: define {{.*}} spir_func void @foo2()
define dso_local spir_func void @foo2() {
entry:
; CHECK-MODULE0: %0 = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @GV to ptr addrspace(4)), align 4
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/split-by-source.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/split-by-source.ll
index cde7ed9dc37ef..6a4e543209526 100644
--- a/llvm/test/tools/llvm-split/SYCL/device-code-split/split-by-source.ll
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/split-by-source.ll
@@ -11,75 +11,68 @@
; CHECK-TU0: @GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
@GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4
-; CHECK-TU1: define dso_local spir_kernel void @TU1_kernel0
-; CHECK-TU1-TXT: TU1_kernel0
-; CHECK-TU0-NOT: define dso_local spir_kernel void @TU1_kernel0
-; CHECK-TU0-TXT-NOT: TU1_kernel0
+; CHECK-TU0-TXT-NOT: TU1_kernelA
+; CHECK-TU1-TXT: TU1_kernelA
-; CHECK-TU1: call spir_func void @func1_TU1()
-
-define dso_local spir_kernel void @TU1_kernel0() #0 {
+; CHECK-TU0-NOT: define dso_local spir_kernel void @TU1_kernelA
+; CHECK-TU1: define dso_local spir_kernel void @TU1_kernelA
+define dso_local spir_kernel void @TU1_kernelA() #0 {
entry:
+; CHECK-TU1: call spir_func void @func1_TU1()
call spir_func void @func1_TU1()
ret void
}
-; CHECK-TU1: define {{.*}} spir_func void @func1_TU1()
; CHECK-TU0-NOT: define {{.*}} spir_func void @func1_TU1()
-
-; CHECK-TU1: call spir_func void @func2_TU1()
-
+; CHECK-TU1: define {{.*}} spir_func void @func1_TU1()
define dso_local spir_func void @func1_TU1() {
entry:
+; CHECK-TU1: call spir_func void @func2_TU1()
call spir_func void @func2_TU1()
ret void
}
-; CHECK-TU1: define {{.*}} spir_func void @func2_TU1()
; CHECK-TU0-NOT: define {{.*}} spir_func void @func2_TU1()
-
+; CHECK-TU1: define {{.*}} spir_func void @func2_TU1()
define linkonce_odr dso_local spir_func void @func2_TU1() {
entry:
ret void
}
-; CHECK-TU1: define dso_local spir_kernel void @TU1_kernel1()
-; CHECK-TU1-TXT: TU1_kernel1
-; CHECK-TU0-NOT: define dso_local spir_kernel void @TU1_kernel1()
-; CHECK-TU0-TXT-NOT: TU1_kernel1
-; CHECK-TU1: call spir_func void @func3_TU1()
+; CHECK-TU0-TXT-NOT: TU1_kernelB
+; CHECK-TU1-TXT: TU1_kernelB
-define dso_local spir_kernel void @TU1_kernel1() #0 {
+; CHECK-TU0-NOT: define dso_local spir_kernel void @TU1_kernelB()
+; CHECK-TU1: define dso_local spir_kernel void @TU1_kernelB()
+define dso_local spir_kernel void @TU1_kernelB() #0 {
entry:
+; CHECK-TU1: call spir_func void @func3_TU1()
call spir_func void @func3_TU1()
ret void
}
-; CHECK-TU1: define {{.*}} spir_func void @func3_TU1()
; CHECK-TU0-NOT: define {{.*}} spir_func void @func3_TU1()
-
+; CHECK-TU1: define {{.*}} spir_func void @func3_TU1()
define dso_local spir_func void @func3_TU1() {
entry:
ret void
}
-; CHECK-TU1-NOT: define dso_local spir_kernel void @TU0_kernel()
-; CHECK-TU1-TXT-NOT: TU0_kernel
-; CHECK-TU0: define dso_local spir_kernel void @TU0_kernel()
; CHECK-TU0-TXT: TU0_kernel
+; CHECK-TU1-TXT-NOT: TU0_kernel
-; CHECK-TU0: call spir_func void @func_TU0()
-
+; CHECK-TU0: define dso_local spir_kernel void @TU0_kernel()
+; CHECK-TU1-NOT: define dso_local spir_kernel void @TU0_kernel()
define dso_local spir_kernel void @TU0_kernel() #1 {
entry:
+; CHECK-TU0: call spir_func void @func_TU0()
call spir_func void @func_TU0()
ret void
}
-; CHECK-TU1-NOT: define {{.*}} spir_func void @func_TU0()
; CHECK-TU0: define {{.*}} spir_func void @func_TU0()
-
+; CHECK-TU1-NOT: define {{.*}} spir_func void @func_TU0()
define dso_local spir_func void @func_TU0() {
entry:
; CHECK-TU0: %0 = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @GV to ptr addrspace(4)), align 4
>From e0b2b9cee1046ce571cad75b66e0cbc4f6c398e9 Mon Sep 17 00:00:00 2001
From: "Sabianin, Maksim" <maksim.sabianin at intel.com>
Date: Mon, 10 Mar 2025 08:42:15 -0700
Subject: [PATCH 14/14] fix typo
---
.../SYCL/device-code-split/module-split-func-ptr.ll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/tools/llvm-split/SYCL/device-code-split/module-split-func-ptr.ll b/llvm/test/tools/llvm-split/SYCL/device-code-split/module-split-func-ptr.ll
index 03dcd6d9b3e8e..8e5853603b35e 100644
--- a/llvm/test/tools/llvm-split/SYCL/device-code-split/module-split-func-ptr.ll
+++ b/llvm/test/tools/llvm-split/SYCL/device-code-split/module-split-func-ptr.ll
@@ -8,7 +8,7 @@
; RUN: FileCheck %s -input-file=%t_1.ll --check-prefix=CHECK-IR1
; CHECK-SYM0: kernelA
-; CHECK-SYM1: kernel1
+; CHECK-SYM1: kernelB
;
; CHECK-IR0: define dso_local spir_kernel void @kernelA
;
@@ -23,7 +23,7 @@ entry:
ret i32 %a
}
-define weak_odr dso_local spir_kernel void @kernel1() #0 {
+define weak_odr dso_local spir_kernel void @kernelB() #0 {
entry:
%0 = call i32 @indirect_call(ptr addrspace(4) addrspacecast ( ptr getelementptr inbounds ( [1 x ptr] , ptr @FuncTable, i64 0, i64 0) to ptr addrspace(4)), i32 0)
ret void
More information about the llvm-commits
mailing list