[Mlir-commits] [mlir] [ROCDL] Pass `amd_code_object_version` when serializing ROCDL gpu module (PR #108874)
Umang Yadav
llvmlistbot at llvm.org
Mon Sep 16 12:10:06 PDT 2024
https://github.com/umangyadav created https://github.com/llvm/llvm-project/pull/108874
This PR adds ability to pass non-default value to `.amdhsa_code_object_version` metadata when serializing ROCDL GPU modules.
It also fixes typos in two places.
>From f2a53b3fcc477b8a965ba3ce792ce7ea1aea5473 Mon Sep 17 00:00:00 2001
From: Umang Yadav <umayadav at amd.com>
Date: Mon, 16 Sep 2024 19:08:23 +0000
Subject: [PATCH] [ROCDL] Pass code_object_version when serializing ROCDL gpu
module, adds unit-tests for the same, fixes a typo on rocDLOps
---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 2 +-
mlir/include/mlir/Target/LLVM/ROCDL/Utils.h | 7 ++-
mlir/lib/Target/LLVM/ROCDL/Target.cpp | 13 +++--
.../Target/LLVM/SerializeROCDLTarget.cpp | 56 ++++++++++++++++++-
4 files changed, 68 insertions(+), 10 deletions(-)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index de232462556502..523d719ae336fd 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -693,7 +693,7 @@ def ROCDL_CvtSrFp8F32Op :
// ROCDL target attribute.
//===----------------------------------------------------------------------===//
-def ROCDL_TargettAttr :
+def ROCDL_TargetAttr :
ROCDL_Attr<"ROCDLTarget", "target"> {
let description = [{
ROCDL target attribute for controlling compilation of AMDGPU targets. All
diff --git a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
index 3d2174c144815b..9b58c28936981e 100644
--- a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
+++ b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
@@ -71,14 +71,17 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
virtual std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
loadBitcodeFiles(llvm::Module &module) override;
- /// Adds `oclc` control variables to the LLVM module.
+ /// Determines required Device Libraries and adds `oclc` control variables to
+ /// the LLVM Module if needed. Also sets
+ /// `amdhsa_code_object_version` module flag
void handleModulePreLink(llvm::Module &module) override;
/// Removes unnecessary metadata from the loaded bitcode files.
LogicalResult handleBitcodeFile(llvm::Module &module) override;
protected:
- /// Adds `oclc` control variables to the LLVM module.
+ /// Adds `oclc` control variables to the LLVM Module if needed and sets
+ /// `amdhsa_code_object_version` module flag
void addControlVariables(llvm::Module &module, AMDGCNLibraries libs,
bool wave64, bool daz, bool finiteOnly,
bool unsafeMath, bool fastMath, bool correctSqrt,
diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
index d8a79a7e80d643..227b45133b57e3 100644
--- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp
+++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
@@ -231,9 +231,6 @@ void SerializeGPUModuleBase::addControlVariables(
llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz,
bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt,
StringRef abiVer) {
- // Return if no device libraries are required.
- if (libs == AMDGCNLibraries::None)
- return;
// Helper function for adding control variables.
auto addControlVariable = [&module](StringRef name, uint32_t value,
uint32_t bitwidth) {
@@ -252,6 +249,13 @@ void SerializeGPUModuleBase::addControlVariables(
controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8));
controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
};
+
+ int abi = 500;
+ abiVer.getAsInteger(0, abi);
+ module.addModuleFlag(llvm::Module::Error, "amdhsa_code_object_version", abi);
+ // Return if no device libraries are required.
+ if (libs == AMDGCNLibraries::None)
+ return;
// Add ocml related control variables.
if (any(libs & AMDGCNLibraries::Ocml)) {
addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath, 8);
@@ -263,7 +267,6 @@ void SerializeGPUModuleBase::addControlVariables(
// Add ocml or ockl related control variables.
if (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl))) {
addControlVariable("__oclc_wavefrontsize64", wave64, 8);
-
// Get the ISA version.
llvm::AMDGPU::IsaVersion isaVersion = llvm::AMDGPU::getIsaVersion(chip);
// Add the ISA control variable.
@@ -271,8 +274,6 @@ void SerializeGPUModuleBase::addControlVariables(
isaVersion.Minor + 100 * isaVersion.Stepping +
1000 * isaVersion.Major,
32);
- int abi = 500;
- abiVer.getAsInteger(0, abi);
addControlVariable("__oclc_ABI_version", abi, 32);
}
}
diff --git a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp
index 43fa3d850d9e29..0ff766112aef7f 100644
--- a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp
+++ b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp
@@ -70,7 +70,7 @@ class MLIRTargetLLVMROCDL : public ::testing::Test {
};
// Test ROCDL serialization to LLVM.
-TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLMToLLVM)) {
+TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLToLLVM)) {
MLIRContext context(registry);
OwningOpRef<ModuleOp> module =
@@ -104,6 +104,60 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLMToLLVM)) {
ASSERT_TRUE((*llvmModule)->getFunction("rocdl_kernel") != nullptr);
}
}
+// Test ROCDL serialization to ISA with default code object version.
+TEST_F(MLIRTargetLLVMROCDL,
+ SKIP_WITHOUT_AMDGPU(SerializeROCDLToISAWithDefaultCOV)) {
+ MLIRContext context(registry);
+
+ OwningOpRef<ModuleOp> module =
+ parseSourceString<ModuleOp>(moduleStr, &context);
+ ASSERT_TRUE(!!module);
+
+ // Create a ROCDL target.
+ ROCDL::ROCDLTargetAttr target = ROCDL::ROCDLTargetAttr::get(&context);
+
+ // Serialize the module.
+ auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
+ ASSERT_TRUE(!!serializer);
+ gpu::TargetOptions options("", {}, "", gpu::CompilationTarget::Assembly);
+ for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
+ std::optional<SmallVector<char, 0>> object =
+ serializer.serializeToObject(gpuModule, options);
+ // Check that the serializer was successful.
+ ASSERT_TRUE(object != std::nullopt);
+ ASSERT_TRUE(!object->empty());
+ ASSERT_TRUE(StringRef(object->data(), object->size())
+ .contains(".amdhsa_code_object_version 5"));
+ }
+}
+
+// Test ROCDL serialization to ISA with non-default code object version.
+TEST_F(MLIRTargetLLVMROCDL,
+ SKIP_WITHOUT_AMDGPU(SerializeROCDLToISAWithNonDefaultCOV)) {
+ MLIRContext context(registry);
+
+ OwningOpRef<ModuleOp> module =
+ parseSourceString<ModuleOp>(moduleStr, &context);
+ ASSERT_TRUE(!!module);
+
+ // Create a ROCDL target.
+ ROCDL::ROCDLTargetAttr target = ROCDL::ROCDLTargetAttr::get(
+ &context, 2, "amdgcn-amd-amdhsa", "gfx900", "", "400");
+
+ // Serialize the module.
+ auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
+ ASSERT_TRUE(!!serializer);
+ gpu::TargetOptions options("", {}, "", gpu::CompilationTarget::Assembly);
+ for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
+ std::optional<SmallVector<char, 0>> object =
+ serializer.serializeToObject(gpuModule, options);
+ // Check that the serializer was successful.
+ ASSERT_TRUE(object != std::nullopt);
+ ASSERT_TRUE(!object->empty());
+ ASSERT_TRUE(StringRef(object->data(), object->size())
+ .contains(".amdhsa_code_object_version 4"));
+ }
+}
// Test ROCDL serialization to PTX.
TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLToPTX)) {
More information about the Mlir-commits
mailing list