[Mlir-commits] [mlir] [mlir][Target] Improve ROCDL gpu serialization API (PR #95456)
Fabian Mora
llvmlistbot at llvm.org
Thu Jun 13 12:39:49 PDT 2024
https://github.com/fabianmcg updated https://github.com/llvm/llvm-project/pull/95456
>From 46e97cf36472e1c609487e102809a10cca8d2d1a Mon Sep 17 00:00:00 2001
From: Fabian Mora <fmora.dev at gmail.com>
Date: Thu, 13 Jun 2024 18:54:29 +0000
Subject: [PATCH] [mlir][Target] ROCDL
---
mlir/include/mlir/Target/LLVM/ROCDL/Utils.h | 86 +++++-
mlir/lib/Target/LLVM/ROCDL/Target.cpp | 282 +++++++++++---------
2 files changed, 239 insertions(+), 129 deletions(-)
diff --git a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
index 374fa65bd02e3..acbdb06be3f67 100644
--- a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
+++ b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
@@ -27,6 +27,64 @@ namespace ROCDL {
/// 5. Returns an empty string.
StringRef getROCMPath();
+/// Helper class for specifying the AMD GCN device libraries required for
+/// compilation.
+class AMDGCNLibraryList {
+public:
+ typedef enum : uint32_t {
+ None = 0,
+ Ockl = 1,
+ Ocml = 2,
+ OpenCL = 4,
+ Hip = 8,
+ LastLib = Hip,
+ All = (LastLib << 1) - 1
+ } Library;
+
+ explicit AMDGCNLibraryList(uint32_t libs = All) : libList(All & libs) {}
+
+ /// Return a list with no libraries.
+ static AMDGCNLibraryList getEmpty() { return AMDGCNLibraryList(None); }
+
+ /// Return the libraries needed for compiling code with OpenCL calls.
+ static AMDGCNLibraryList getOpenCL() {
+ return AMDGCNLibraryList(Ockl | Ocml | OpenCL);
+ }
+
+ /// Returns true if the list is empty.
+ bool isEmpty() const { return libList == None; }
+
+ /// Adds a library to the list.
+ AMDGCNLibraryList addLibrary(Library lib) {
+ libList = libList | lib;
+ return *this;
+ }
+
+ /// Adds all the libraries in `list` to the library list.
+ AMDGCNLibraryList addList(AMDGCNLibraryList list) {
+ libList = libList | list.libList;
+ return *this;
+ }
+
+ /// Removes a library from the list.
+ AMDGCNLibraryList removeLibrary(Library lib) {
+ libList = libList & ~lib;
+ return *this;
+ }
+
+ /// Returns true if `lib` is in the list of libraries.
+ bool requiresLibrary(Library lib) const { return (libList & lib) != None; }
+
+ /// Returns true if `libList` contains all the libraries in `libs`.
+ bool containLibraries(uint32_t libs) const {
+ return (libList & libs) != None;
+ }
+
+private:
+ /// Library list.
+ uint32_t libList;
+};
+
/// Base class for all ROCDL serializations from GPU modules into binary
/// strings. By default this class serializes into LLVM bitcode.
class SerializeGPUModuleBase : public LLVM::ModuleToObject {
@@ -49,8 +107,8 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
/// Returns the bitcode files to be loaded.
ArrayRef<std::string> getFileList() const;
- /// Appends standard ROCm device libraries like `ocml.bc`, `ockl.bc`, etc.
- LogicalResult appendStandardLibs();
+ /// Appends standard ROCm device Library to `fileList`.
+ LogicalResult appendStandardLibs(AMDGCNLibraryList libs);
/// Loads the bitcode files in `fileList`.
virtual std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
@@ -63,15 +121,20 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
LogicalResult handleBitcodeFile(llvm::Module &module) override;
protected:
- /// Appends the paths of common ROCm device libraries to `libs`.
- LogicalResult getCommonBitcodeLibs(llvm::SmallVector<std::string> &libs,
- SmallVector<char, 256> &libPath,
- StringRef isaVersion);
-
/// Adds `oclc` control variables to the LLVM module.
- void addControlVariables(llvm::Module &module, bool wave64, bool daz,
- bool finiteOnly, bool unsafeMath, bool fastMath,
- bool correctSqrt, StringRef abiVer);
+ void addControlVariables(llvm::Module &module, AMDGCNLibraryList libs,
+ bool wave64, bool daz, bool finiteOnly,
+ bool unsafeMath, bool fastMath, bool correctSqrt,
+ StringRef abiVer);
+
+ /// Compiles assembly to a binary.
+ virtual std::optional<SmallVector<char, 0>>
+ compileToBinary(const std::string &serializedISA);
+
+ /// Default implementation of `ModuleToObject::moduleToObject`.
+ std::optional<SmallVector<char, 0>>
+ moduleToObjectImpl(const gpu::TargetOptions &targetOptions,
+ llvm::Module &llvmModule);
/// Returns the assembled ISA.
std::optional<SmallVector<char, 0>> assembleIsa(StringRef isa);
@@ -84,6 +147,9 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
/// List of LLVM bitcode files to link to.
SmallVector<std::string> fileList;
+
+ /// AMD GCN libraries to use when linking, the default is using all.
+ AMDGCNLibraryList deviceLibs = AMDGCNLibraryList::getEmpty();
};
} // namespace ROCDL
} // namespace mlir
diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
index cc13e5b7436ea..435d84454983b 100644
--- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp
+++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
@@ -17,9 +17,6 @@
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
#include "mlir/Support/FileUtilities.h"
#include "mlir/Target/LLVM/ROCDL/Utils.h"
-#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Export.h"
#include "llvm/IR/Constants.h"
@@ -112,8 +109,9 @@ SerializeGPUModuleBase::SerializeGPUModuleBase(
if (auto file = dyn_cast<StringAttr>(attr))
fileList.push_back(file.str());
- // Append standard ROCm device bitcode libraries to the files to be loaded.
- (void)appendStandardLibs();
+ // By default add all libraries if the toolkit path is not empty.
+ if (!getToolkitPath().empty())
+ deviceLibs = AMDGCNLibraryList(AMDGCNLibraryList::All);
}
void SerializeGPUModuleBase::init() {
@@ -138,29 +136,70 @@ ArrayRef<std::string> SerializeGPUModuleBase::getFileList() const {
return fileList;
}
-LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
+LogicalResult
+SerializeGPUModuleBase::appendStandardLibs(AMDGCNLibraryList libs) {
+ if (libs.isEmpty())
+ return success();
StringRef pathRef = getToolkitPath();
- if (!pathRef.empty()) {
- SmallVector<char, 256> path;
- path.insert(path.begin(), pathRef.begin(), pathRef.end());
- llvm::sys::path::append(path, "amdgcn", "bitcode");
- pathRef = StringRef(path.data(), path.size());
- if (!llvm::sys::fs::is_directory(pathRef)) {
- getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef
- << " does not exist or is not a directory.";
- return failure();
- }
- StringRef isaVersion =
- llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip));
- isaVersion.consume_front("gfx");
- return getCommonBitcodeLibs(fileList, path, isaVersion);
+ // Fail if the toolkit is empty.
+ if (pathRef.empty())
+ return success();
+
+ // Get the path for the device libraries
+ SmallString<256> path;
+ path.insert(path.begin(), pathRef.begin(), pathRef.end());
+ llvm::sys::path::append(path, "amdgcn", "bitcode");
+ pathRef = StringRef(path.data(), path.size());
+
+ // Fail if the path is invalid.
+ if (!llvm::sys::fs::is_directory(pathRef)) {
+ getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef
+ << " does not exist or is not a directory.";
+ return failure();
}
+
+ // Get the ISA version.
+ StringRef isaVersion =
+ llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip));
+ isaVersion.consume_front("gfx");
+
+ // Helper function for adding a library.
+ auto addLib = [&](const Twine &lib) -> bool {
+ auto baseSize = path.size();
+ llvm::sys::path::append(path, lib);
+ StringRef pathRef(path.data(), path.size());
+ if (!llvm::sys::fs::is_regular_file(pathRef)) {
+ getOperation().emitRemark() << "Bitcode library path: " << pathRef
+ << " does not exist or is not a file.\n";
+ return true;
+ }
+ fileList.push_back(pathRef.str());
+ path.truncate(baseSize);
+ return false;
+ };
+
+ // Add ROCm device libraries. Fail if any of the libraries is not found, ie.
+ // if any of the `addLib` failed.
+ if ((libs.requiresLibrary(AMDGCNLibraryList::Ocml) && addLib("ocml.bc")) ||
+ (libs.requiresLibrary(AMDGCNLibraryList::Ockl) && addLib("ockl.bc")) ||
+ (libs.requiresLibrary(AMDGCNLibraryList::Hip) && addLib("hip.bc")) ||
+ (libs.requiresLibrary(AMDGCNLibraryList::OpenCL) &&
+ addLib("opencl.bc")) ||
+ (libs.containLibraries(AMDGCNLibraryList::Ocml |
+ AMDGCNLibraryList::Ockl) &&
+ addLib("oclc_isa_version_" + isaVersion + ".bc")))
+ return failure();
return success();
}
std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) {
SmallVector<std::unique_ptr<llvm::Module>> bcFiles;
+ // Return if there are no libs to load.
+ if (deviceLibs.isEmpty() && fileList.empty())
+ return bcFiles;
+ if (failed(appendStandardLibs(deviceLibs)))
+ return std::nullopt;
if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles,
true)))
return std::nullopt;
@@ -174,80 +213,79 @@ LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) {
// Stop spamming us with clang version numbers
if (auto *ident = module.getNamedMetadata("llvm.ident"))
module.eraseNamedMetadata(ident);
+ // Override the libModules datalayout and target triple with the compiler's
+ // data layout should there be a discrepency.
+ setDataLayoutAndTriple(module);
return success();
}
void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) {
- [[maybe_unused]] std::optional<llvm::TargetMachine *> targetMachine =
+ std::optional<llvm::TargetMachine *> targetMachine =
getOrCreateTargetMachine();
assert(targetMachine && "expect a TargetMachine");
- addControlVariables(module, target.hasWave64(), target.hasDaz(),
+ // If all libraries are not set, traverse the module to determine which
+ // libraries are required.
+ if (!deviceLibs.requiresLibrary(AMDGCNLibraryList::All)) {
+ for (llvm::Function &f : module.functions()) {
+ if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) {
+ StringRef funcName = f.getName();
+ if ("printf" == funcName)
+ deviceLibs.addList(AMDGCNLibraryList::getOpenCL());
+ if (funcName.starts_with("__ockl_"))
+ deviceLibs.addLibrary(AMDGCNLibraryList::Ockl);
+ if (funcName.starts_with("__ocml_"))
+ deviceLibs.addLibrary(AMDGCNLibraryList::Ocml);
+ }
+ }
+ }
+ addControlVariables(module, deviceLibs, target.hasWave64(), target.hasDaz(),
target.hasFiniteOnly(), target.hasUnsafeMath(),
target.hasFastMath(), target.hasCorrectSqrt(),
target.getAbi());
}
-// Get the paths of ROCm device libraries.
-LogicalResult SerializeGPUModuleBase::getCommonBitcodeLibs(
- llvm::SmallVector<std::string> &libs, SmallVector<char, 256> &libPath,
- StringRef isaVersion) {
- auto addLib = [&](StringRef path) -> bool {
- if (!llvm::sys::fs::is_regular_file(path)) {
- getOperation().emitRemark() << "Bitcode library path: " << path
- << " does not exist or is not a file.\n";
- return true;
- }
- libs.push_back(path.str());
- return false;
- };
- auto getLibPath = [&libPath](Twine lib) {
- auto baseSize = libPath.size();
- llvm::sys::path::append(libPath, lib + ".bc");
- std::string path(StringRef(libPath.data(), libPath.size()).str());
- libPath.truncate(baseSize);
- return path;
- };
-
- // Add ROCm device libraries. Fail if any of the libraries is not found.
- if (addLib(getLibPath("ocml")) || addLib(getLibPath("ockl")) ||
- addLib(getLibPath("hip")) || addLib(getLibPath("opencl")) ||
- addLib(getLibPath("oclc_isa_version_" + isaVersion)))
- return failure();
- return success();
-}
-
void SerializeGPUModuleBase::addControlVariables(
- llvm::Module &module, bool wave64, bool daz, bool finiteOnly,
- bool unsafeMath, bool fastMath, bool correctSqrt, StringRef abiVer) {
- llvm::Type *i8Ty = llvm::Type::getInt8Ty(module.getContext());
- auto addControlVariable = [i8Ty, &module](StringRef name, bool enable) {
+ llvm::Module &module, AMDGCNLibraryList libs, bool wave64, bool daz,
+ bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt,
+ StringRef abiVer) {
+ // Return if no device libraries are required.
+ if (libs.isEmpty())
+ return;
+ // Helper function for adding control variables.
+ auto addControlVariable = [&module](StringRef name, uint32_t value,
+ uint32_t bitwidth) {
+ if (module.getNamedGlobal(name)) {
+ return;
+ }
+ llvm::IntegerType *type =
+ llvm::IntegerType::getIntNTy(module.getContext(), bitwidth);
llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable(
- module, i8Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
- llvm::ConstantInt::get(i8Ty, enable), name, nullptr,
- llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4);
+ module, /*isConstant=*/type, true,
+ llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
+ llvm::ConstantInt::get(type, value), name, /*before=*/nullptr,
+ /*threadLocalMode=*/llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
+ /*addressSpace=*/4);
controlVariable->setVisibility(
llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
- controlVariable->setAlignment(llvm::MaybeAlign(1));
+ controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8));
controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
};
- addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath);
- addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath);
- addControlVariable("__oclc_daz_opt", daz || fastMath);
- addControlVariable("__oclc_correctly_rounded_sqrt32",
- correctSqrt && !fastMath);
- addControlVariable("__oclc_wavefrontsize64", wave64);
-
- llvm::Type *i32Ty = llvm::Type::getInt32Ty(module.getContext());
- int abi = 500;
- abiVer.getAsInteger(0, abi);
- llvm::GlobalVariable *abiVersion = new llvm::GlobalVariable(
- module, i32Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
- llvm::ConstantInt::get(i32Ty, abi), "__oclc_ABI_version", nullptr,
- llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4);
- abiVersion->setVisibility(
- llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
- abiVersion->setAlignment(llvm::MaybeAlign(4));
- abiVersion->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
+ // Add ocml related control variables.
+ if (libs.requiresLibrary(AMDGCNLibraryList::Ocml)) {
+ addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath, 8);
+ addControlVariable("__oclc_daz_opt", daz || fastMath, 8);
+ addControlVariable("__oclc_correctly_rounded_sqrt32",
+ correctSqrt && !fastMath, 8);
+ addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath, 8);
+ }
+ // Add ocml or ockl related control variables.
+ if (libs.containLibraries(AMDGCNLibraryList::Ocml |
+ AMDGCNLibraryList::Ockl)) {
+ addControlVariable("__oclc_wavefrontsize64", wave64, 8);
+ int abi = 500;
+ abiVer.getAsInteger(0, abi);
+ addControlVariable("__oclc_ABI_version", abi, 32);
+ }
}
std::optional<SmallVector<char, 0>>
@@ -312,43 +350,11 @@ SerializeGPUModuleBase::assembleIsa(StringRef isa) {
parser->setTargetParser(*tap);
parser->Run(false);
-
return result;
}
-#if MLIR_ENABLE_ROCM_CONVERSIONS
-namespace {
-class AMDGPUSerializer : public SerializeGPUModuleBase {
-public:
- AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
- const gpu::TargetOptions &targetOptions);
-
- gpu::GPUModuleOp getOperation();
-
- // Compile to HSA.
- std::optional<SmallVector<char, 0>>
- compileToBinary(const std::string &serializedISA);
-
- std::optional<SmallVector<char, 0>>
- moduleToObject(llvm::Module &llvmModule) override;
-
-private:
- // Target options.
- gpu::TargetOptions targetOptions;
-};
-} // namespace
-
-AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
- const gpu::TargetOptions &targetOptions)
- : SerializeGPUModuleBase(module, target, targetOptions),
- targetOptions(targetOptions) {}
-
-gpu::GPUModuleOp AMDGPUSerializer::getOperation() {
- return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
-}
-
std::optional<SmallVector<char, 0>>
-AMDGPUSerializer::compileToBinary(const std::string &serializedISA) {
+SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) {
// Assemble the ISA.
std::optional<SmallVector<char, 0>> isaBinary = assembleIsa(serializedISA);
@@ -407,13 +413,13 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) {
return SmallVector<char, 0>(buffer.begin(), buffer.end());
}
-std::optional<SmallVector<char, 0>>
-AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
+std::optional<SmallVector<char, 0>> SerializeGPUModuleBase::moduleToObjectImpl(
+ const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule) {
// Return LLVM IR if the compilation target is offload.
#define DEBUG_TYPE "serialize-to-llvm"
LLVM_DEBUG({
- llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr()
- << "\n"
+ llvm::dbgs() << "LLVM IR for module: "
+ << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n"
<< llvmModule << "\n";
});
#undef DEBUG_TYPE
@@ -437,7 +443,8 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
}
#define DEBUG_TYPE "serialize-to-isa"
LLVM_DEBUG({
- llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n"
+ llvm::dbgs() << "ISA for module: "
+ << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n"
<< *serializedISA << "\n";
});
#undef DEBUG_TYPE
@@ -448,6 +455,38 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
// Compile to binary.
return compileToBinary(*serializedISA);
}
+
+#if MLIR_ENABLE_ROCM_CONVERSIONS
+namespace {
+class AMDGPUSerializer : public SerializeGPUModuleBase {
+public:
+ AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
+ const gpu::TargetOptions &targetOptions);
+
+ gpu::GPUModuleOp getOperation();
+
+ std::optional<SmallVector<char, 0>>
+ moduleToObject(llvm::Module &llvmModule) override;
+
+private:
+ // Target options.
+ gpu::TargetOptions targetOptions;
+};
+} // namespace
+
+AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
+ const gpu::TargetOptions &targetOptions)
+ : SerializeGPUModuleBase(module, target, targetOptions),
+ targetOptions(targetOptions) {}
+
+gpu::GPUModuleOp AMDGPUSerializer::getOperation() {
+ return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
+}
+
+std::optional<SmallVector<char, 0>>
+AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
+ return moduleToObjectImpl(targetOptions, llvmModule);
+}
#endif // MLIR_ENABLE_ROCM_CONVERSIONS
std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
@@ -477,10 +516,15 @@ ROCDLTargetAttrImpl::createObject(Attribute attribute,
const SmallVector<char, 0> &object,
const gpu::TargetOptions &options) const {
gpu::CompilationTarget format = options.getCompilationTarget();
+ // If format is `fatbin` transform it to binary as `fatbin` is not yet
+ // supported.
+ if (format > gpu::CompilationTarget::Binary)
+ format = gpu::CompilationTarget::Binary;
+
+ DictionaryAttr properties{};
Builder builder(attribute.getContext());
return builder.getAttr<gpu::ObjectAttr>(
- attribute,
- format > gpu::CompilationTarget::Binary ? gpu::CompilationTarget::Binary
- : format,
- builder.getStringAttr(StringRef(object.data(), object.size())), nullptr);
+ attribute, format,
+ builder.getStringAttr(StringRef(object.data(), object.size())),
+ properties);
}
More information about the Mlir-commits
mailing list