[Mlir-commits] [mlir] Reland [mlir][Target] Improve ROCDL gpu serialization API (PR #96198)

Thu Jun 20 07:36:22 PDT 2024

https://github.com/fabianmcg created https://github.com/llvm/llvm-project/pull/96198

Reland: https://github.com/llvm/llvm-project/pull/95456

This patch improves the ROCDL gpu serialization API by:
- Introducing the enum `AMDGCNLibraries` for specifying the AMD GCN device code libraries to use during linking.
- Removing `getCommonBitcodeLibs` in favor of `AMDGCNLibraries`. Previously `getCommonBitcodeLibs` would try to load all AMD GCN bitcode librariesm now it will only load the requested libraries.
- Exposing the `compileToBinary` method and making it virtual, allowing downstream users to re-use this method.
- Exposing `moduleToObjectImpl`, this method provides a prototype flow for compiling to binary, allowing downstream users to re-use this method.
- It also avoids constructing the control variables if no device libraries are being used.
- Changes the style of the error messages to be composable, ie no full stops.
- Adds an error message for when the ROCm toolkit can't be found but it was required.

>From d81439fde7b2d986a03ea59de829baecaa0a7185 Mon Sep 17 00:00:00 2001
From: Fabian Mora <fmora.dev at gmail.com>
Date: Thu, 13 Jun 2024 18:54:29 +0000
Subject: [PATCH] [mlir][Target] Improve ROCDL serialization

---
 mlir/include/mlir/Target/LLVM/ROCDL/Utils.h |  41 ++-
 mlir/lib/Target/LLVM/CMakeLists.txt         |  13 +-
 mlir/lib/Target/LLVM/ROCDL/Target.cpp       | 302 +++++++++++---------
 3 files changed, 206 insertions(+), 150 deletions(-)

diff --git a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
index 374fa65bd02e3..44c9ded317fa5 100644
--- a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
+++ b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
@@ -27,6 +27,19 @@ namespace ROCDL {
 /// 5. Returns an empty string.
 StringRef getROCMPath();
 
+/// Helper enum for specifying the AMD GCN device libraries required for
+/// compilation.
+enum class AMDGCNLibraries : uint32_t {
+  None = 0,
+  Ockl = 1,
+  Ocml = 2,
+  OpenCL = 4,
+  Hip = 8,
+  LastLib = Hip,
+  LLVM_MARK_AS_BITMASK_ENUM(LastLib),
+  All = (LastLib << 1) - 1
+};
+
 /// Base class for all ROCDL serializations from GPU modules into binary
 /// strings. By default this class serializes into LLVM bitcode.
 class SerializeGPUModuleBase : public LLVM::ModuleToObject {
@@ -49,8 +62,8 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
   /// Returns the bitcode files to be loaded.
   ArrayRef<std::string> getFileList() const;
 
-  /// Appends standard ROCm device libraries like `ocml.bc`, `ockl.bc`, etc.
-  LogicalResult appendStandardLibs();
+  /// Appends standard ROCm device libraries to `fileList`.
+  LogicalResult appendStandardLibs(AMDGCNLibraries libs);
 
   /// Loads the bitcode files in `fileList`.
   virtual std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
@@ -63,15 +76,20 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
   LogicalResult handleBitcodeFile(llvm::Module &module) override;
 
 protected:
-  /// Appends the paths of common ROCm device libraries to `libs`.
-  LogicalResult getCommonBitcodeLibs(llvm::SmallVector<std::string> &libs,
-                                     SmallVector<char, 256> &libPath,
-                                     StringRef isaVersion);
-
   /// Adds `oclc` control variables to the LLVM module.
-  void addControlVariables(llvm::Module &module, bool wave64, bool daz,
-                           bool finiteOnly, bool unsafeMath, bool fastMath,
-                           bool correctSqrt, StringRef abiVer);
+  void addControlVariables(llvm::Module &module, AMDGCNLibraries libs,
+                           bool wave64, bool daz, bool finiteOnly,
+                           bool unsafeMath, bool fastMath, bool correctSqrt,
+                           StringRef abiVer);
+
+  /// Compiles assembly to a binary.
+  virtual std::optional<SmallVector<char, 0>>
+  compileToBinary(const std::string &serializedISA);
+
+  /// Default implementation of `ModuleToObject::moduleToObject`.
+  std::optional<SmallVector<char, 0>>
+  moduleToObjectImpl(const gpu::TargetOptions &targetOptions,
+                     llvm::Module &llvmModule);
 
   /// Returns the assembled ISA.
   std::optional<SmallVector<char, 0>> assembleIsa(StringRef isa);
@@ -84,6 +102,9 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
 
   /// List of LLVM bitcode files to link to.
   SmallVector<std::string> fileList;
+
+  /// AMD GCN libraries to use when linking, the default is using none.
+  AMDGCNLibraries deviceLibs = AMDGCNLibraries::None;
 };
 } // namespace ROCDL
 } // namespace mlir
diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
index 1c709e18cbfda..93dc5ff9d35b7 100644
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -127,17 +127,18 @@ add_mlir_dialect_library(MLIRROCDLTarget
   )
 
 if(MLIR_ENABLE_ROCM_CONVERSIONS)
-  if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD))
-    message(SEND_ERROR
-      "Building mlir with ROCm support requires the AMDGPU backend")
-  endif()
-
   if (DEFINED ROCM_PATH)
     set(DEFAULT_ROCM_PATH "${ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs")
   elseif(DEFINED ENV{ROCM_PATH})
     set(DEFAULT_ROCM_PATH "$ENV{ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs")
   else()
-    set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs")
+    if (WIN32)
+      # Avoid setting an UNIX path for Windows.
+      # TODO: Eventually migrate to FindHIP once it becomes a part of CMake.
+      set(DEFAULT_ROCM_PATH "" CACHE PATH "Fallback path to search for ROCm installs")
+    else()
+      set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs")
+    endif()
   endif()
   message(VERBOSE "MLIR Default ROCM toolkit path: ${DEFAULT_ROCM_PATH}")
 
diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
index cc13e5b7436ea..4ca8864555923 100644
--- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp
+++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
@@ -17,9 +17,6 @@
 #include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
 #include "mlir/Support/FileUtilities.h"
 #include "mlir/Target/LLVM/ROCDL/Utils.h"
-#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Export.h"
 
 #include "llvm/IR/Constants.h"
@@ -112,8 +109,9 @@ SerializeGPUModuleBase::SerializeGPUModuleBase(
       if (auto file = dyn_cast<StringAttr>(attr))
         fileList.push_back(file.str());
 
-  // Append standard ROCm device bitcode libraries to the files to be loaded.
-  (void)appendStandardLibs();
+  // By default add all libraries if the toolkit path is not empty.
+  if (!getToolkitPath().empty())
+    deviceLibs = AMDGCNLibraries::All;
 }
 
 void SerializeGPUModuleBase::init() {
@@ -138,28 +136,63 @@ ArrayRef<std::string> SerializeGPUModuleBase::getFileList() const {
   return fileList;
 }
 
-LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
+LogicalResult SerializeGPUModuleBase::appendStandardLibs(AMDGCNLibraries libs) {
+  if (libs == AMDGCNLibraries::None)
+    return success();
   StringRef pathRef = getToolkitPath();
-  if (!pathRef.empty()) {
-    SmallVector<char, 256> path;
-    path.insert(path.begin(), pathRef.begin(), pathRef.end());
-    llvm::sys::path::append(path, "amdgcn", "bitcode");
-    pathRef = StringRef(path.data(), path.size());
-    if (!llvm::sys::fs::is_directory(pathRef)) {
-      getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef
-                                  << " does not exist or is not a directory.";
-      return failure();
-    }
-    StringRef isaVersion =
-        llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip));
-    isaVersion.consume_front("gfx");
-    return getCommonBitcodeLibs(fileList, path, isaVersion);
+
+  // Get the path for the device libraries
+  SmallString<256> path;
+  path.insert(path.begin(), pathRef.begin(), pathRef.end());
+  llvm::sys::path::append(path, "amdgcn", "bitcode");
+  pathRef = StringRef(path.data(), path.size());
+
+  // Fail if the path is invalid.
+  if (!llvm::sys::fs::is_directory(pathRef)) {
+    getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef
+                                << " does not exist or is not a directory";
+    return failure();
   }
+
+  // Get the ISA version.
+  StringRef isaVersion =
+      llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip));
+  isaVersion.consume_front("gfx");
+
+  // Helper function for adding a library.
+  auto addLib = [&](const Twine &lib) -> bool {
+    auto baseSize = path.size();
+    llvm::sys::path::append(path, lib);
+    StringRef pathRef(path.data(), path.size());
+    if (!llvm::sys::fs::is_regular_file(pathRef)) {
+      getOperation().emitRemark() << "bitcode library path: " << pathRef
+                                  << " does not exist or is not a file";
+      return true;
+    }
+    fileList.push_back(pathRef.str());
+    path.truncate(baseSize);
+    return false;
+  };
+
+  // Add ROCm device libraries. Fail if any of the libraries is not found, ie.
+  // if any of the `addLib` failed.
+  if ((any(libs & AMDGCNLibraries::Ocml) && addLib("ocml.bc")) ||
+      (any(libs & AMDGCNLibraries::Ockl) && addLib("ockl.bc")) ||
+      (any(libs & AMDGCNLibraries::Hip) && addLib("hip.bc")) ||
+      (any(libs & AMDGCNLibraries::OpenCL) && addLib("opencl.bc")) ||
+      (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl)) &&
+       addLib("oclc_isa_version_" + isaVersion + ".bc")))
+    return failure();
   return success();
 }
 
 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
 SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) {
+  // Return if there are no libs to load.
+  if (deviceLibs == AMDGCNLibraries::None && fileList.empty())
+    return SmallVector<std::unique_ptr<llvm::Module>>();
+  if (failed(appendStandardLibs(deviceLibs)))
+    return std::nullopt;
   SmallVector<std::unique_ptr<llvm::Module>> bcFiles;
   if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles,
                                       true)))
@@ -174,80 +207,75 @@ LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) {
   // Stop spamming us with clang version numbers
   if (auto *ident = module.getNamedMetadata("llvm.ident"))
     module.eraseNamedMetadata(ident);
+  // Override the libModules datalayout and target triple with the compiler's
+  // data layout should there be a discrepency.
+  setDataLayoutAndTriple(module);
   return success();
 }
 
 void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) {
-  [[maybe_unused]] std::optional<llvm::TargetMachine *> targetMachine =
-      getOrCreateTargetMachine();
-  assert(targetMachine && "expect a TargetMachine");
-  addControlVariables(module, target.hasWave64(), target.hasDaz(),
+  // If all libraries are not set, traverse the module to determine which
+  // libraries are required.
+  if (deviceLibs != AMDGCNLibraries::All) {
+    for (llvm::Function &f : module.functions()) {
+      if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) {
+        StringRef funcName = f.getName();
+        if ("printf" == funcName)
+          deviceLibs |= AMDGCNLibraries::OpenCL | AMDGCNLibraries::Ockl |
+                        AMDGCNLibraries::Ocml;
+        if (funcName.starts_with("__ockl_"))
+          deviceLibs |= AMDGCNLibraries::Ockl;
+        if (funcName.starts_with("__ocml_"))
+          deviceLibs |= AMDGCNLibraries::Ocml;
+      }
+    }
+  }
+  addControlVariables(module, deviceLibs, target.hasWave64(), target.hasDaz(),
                       target.hasFiniteOnly(), target.hasUnsafeMath(),
                       target.hasFastMath(), target.hasCorrectSqrt(),
                       target.getAbi());
 }
 
-// Get the paths of ROCm device libraries.
-LogicalResult SerializeGPUModuleBase::getCommonBitcodeLibs(
-    llvm::SmallVector<std::string> &libs, SmallVector<char, 256> &libPath,
-    StringRef isaVersion) {
-  auto addLib = [&](StringRef path) -> bool {
-    if (!llvm::sys::fs::is_regular_file(path)) {
-      getOperation().emitRemark() << "Bitcode library path: " << path
-                                  << " does not exist or is not a file.\n";
-      return true;
-    }
-    libs.push_back(path.str());
-    return false;
-  };
-  auto getLibPath = [&libPath](Twine lib) {
-    auto baseSize = libPath.size();
-    llvm::sys::path::append(libPath, lib + ".bc");
-    std::string path(StringRef(libPath.data(), libPath.size()).str());
-    libPath.truncate(baseSize);
-    return path;
-  };
-
-  // Add ROCm device libraries. Fail if any of the libraries is not found.
-  if (addLib(getLibPath("ocml")) || addLib(getLibPath("ockl")) ||
-      addLib(getLibPath("hip")) || addLib(getLibPath("opencl")) ||
-      addLib(getLibPath("oclc_isa_version_" + isaVersion)))
-    return failure();
-  return success();
-}
-
 void SerializeGPUModuleBase::addControlVariables(
-    llvm::Module &module, bool wave64, bool daz, bool finiteOnly,
-    bool unsafeMath, bool fastMath, bool correctSqrt, StringRef abiVer) {
-  llvm::Type *i8Ty = llvm::Type::getInt8Ty(module.getContext());
-  auto addControlVariable = [i8Ty, &module](StringRef name, bool enable) {
+    llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz,
+    bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt,
+    StringRef abiVer) {
+  // Return if no device libraries are required.
+  if (libs == AMDGCNLibraries::None)
+    return;
+  // Helper function for adding control variables.
+  auto addControlVariable = [&module](StringRef name, uint32_t value,
+                                      uint32_t bitwidth) {
+    if (module.getNamedGlobal(name))
+      return;
+    llvm::IntegerType *type =
+        llvm::IntegerType::getIntNTy(module.getContext(), bitwidth);
     llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable(
-        module, i8Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
-        llvm::ConstantInt::get(i8Ty, enable), name, nullptr,
-        llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4);
+        module, /*isConstant=*/type, true,
+        llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
+        llvm::ConstantInt::get(type, value), name, /*before=*/nullptr,
+        /*threadLocalMode=*/llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
+        /*addressSpace=*/4);
     controlVariable->setVisibility(
         llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
-    controlVariable->setAlignment(llvm::MaybeAlign(1));
+    controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8));
     controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
   };
-  addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath);
-  addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath);
-  addControlVariable("__oclc_daz_opt", daz || fastMath);
-  addControlVariable("__oclc_correctly_rounded_sqrt32",
-                     correctSqrt && !fastMath);
-  addControlVariable("__oclc_wavefrontsize64", wave64);
-
-  llvm::Type *i32Ty = llvm::Type::getInt32Ty(module.getContext());
-  int abi = 500;
-  abiVer.getAsInteger(0, abi);
-  llvm::GlobalVariable *abiVersion = new llvm::GlobalVariable(
-      module, i32Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
-      llvm::ConstantInt::get(i32Ty, abi), "__oclc_ABI_version", nullptr,
-      llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4);
-  abiVersion->setVisibility(
-      llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
-  abiVersion->setAlignment(llvm::MaybeAlign(4));
-  abiVersion->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
+  // Add ocml related control variables.
+  if (any(libs & AMDGCNLibraries::Ocml)) {
+    addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath, 8);
+    addControlVariable("__oclc_daz_opt", daz || fastMath, 8);
+    addControlVariable("__oclc_correctly_rounded_sqrt32",
+                       correctSqrt && !fastMath, 8);
+    addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath, 8);
+  }
+  // Add ocml or ockl related control variables.
+  if (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl))) {
+    addControlVariable("__oclc_wavefrontsize64", wave64, 8);
+    int abi = 500;
+    abiVer.getAsInteger(0, abi);
+    addControlVariable("__oclc_ABI_version", abi, 32);
+  }
 }
 
 std::optional<SmallVector<char, 0>>
@@ -307,53 +335,21 @@ SerializeGPUModuleBase::assembleIsa(StringRef isa) {
 
   if (!tap) {
     emitError(loc, "assembler initialization error");
-    return {};
+    return std::nullopt;
   }
 
   parser->setTargetParser(*tap);
   parser->Run(false);
-
-  return result;
-}
-
-#if MLIR_ENABLE_ROCM_CONVERSIONS
-namespace {
-class AMDGPUSerializer : public SerializeGPUModuleBase {
-public:
-  AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
-                   const gpu::TargetOptions &targetOptions);
-
-  gpu::GPUModuleOp getOperation();
-
-  // Compile to HSA.
-  std::optional<SmallVector<char, 0>>
-  compileToBinary(const std::string &serializedISA);
-
-  std::optional<SmallVector<char, 0>>
-  moduleToObject(llvm::Module &llvmModule) override;
-
-private:
-  // Target options.
-  gpu::TargetOptions targetOptions;
-};
-} // namespace
-
-AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
-                                   const gpu::TargetOptions &targetOptions)
-    : SerializeGPUModuleBase(module, target, targetOptions),
-      targetOptions(targetOptions) {}
-
-gpu::GPUModuleOp AMDGPUSerializer::getOperation() {
-  return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
+  return std::move(result);
 }
 
 std::optional<SmallVector<char, 0>>
-AMDGPUSerializer::compileToBinary(const std::string &serializedISA) {
+SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) {
   // Assemble the ISA.
   std::optional<SmallVector<char, 0>> isaBinary = assembleIsa(serializedISA);
 
   if (!isaBinary) {
-    getOperation().emitError() << "Failed during ISA assembling.";
+    getOperation().emitError() << "failed during ISA assembling";
     return std::nullopt;
   }
 
@@ -363,7 +359,7 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) {
   if (llvm::sys::fs::createTemporaryFile("kernel%%", "o", tempIsaBinaryFd,
                                          tempIsaBinaryFilename)) {
     getOperation().emitError()
-        << "Failed to create a temporary file for dumping the ISA binary.";
+        << "failed to create a temporary file for dumping the ISA binary";
     return std::nullopt;
   }
   llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
@@ -378,7 +374,7 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) {
   if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco",
                                          tempHsacoFilename)) {
     getOperation().emitError()
-        << "Failed to create a temporary file for the HSA code object.";
+        << "failed to create a temporary file for the HSA code object";
     return std::nullopt;
   }
   llvm::FileRemover cleanupHsaco(tempHsacoFilename);
@@ -389,7 +385,7 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) {
       lldPath,
       {"ld.lld", "-shared", tempIsaBinaryFilename, "-o", tempHsacoFilename});
   if (lldResult != 0) {
-    getOperation().emitError() << "lld invocation failed.";
+    getOperation().emitError() << "lld invocation failed";
     return std::nullopt;
   }
 
@@ -398,7 +394,7 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) {
       llvm::MemoryBuffer::getFile(tempHsacoFilename, /*IsText=*/false);
   if (!hsacoFile) {
     getOperation().emitError()
-        << "Failed to read the HSA code object from the temp file.";
+        << "failed to read the HSA code object from the temp file";
     return std::nullopt;
   }
 
@@ -407,13 +403,13 @@ AMDGPUSerializer::compileToBinary(const std::string &serializedISA) {
   return SmallVector<char, 0>(buffer.begin(), buffer.end());
 }
 
-std::optional<SmallVector<char, 0>>
-AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
+std::optional<SmallVector<char, 0>> SerializeGPUModuleBase::moduleToObjectImpl(
+    const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule) {
   // Return LLVM IR if the compilation target is offload.
 #define DEBUG_TYPE "serialize-to-llvm"
   LLVM_DEBUG({
-    llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr()
-                 << "\n"
+    llvm::dbgs() << "LLVM IR for module: "
+                 << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n"
                  << llvmModule << "\n";
   });
 #undef DEBUG_TYPE
@@ -423,8 +419,8 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
   std::optional<llvm::TargetMachine *> targetMachine =
       getOrCreateTargetMachine();
   if (!targetMachine) {
-    getOperation().emitError() << "Target Machine unavailable for triple "
-                               << triple << ", can't compile with LLVM\n";
+    getOperation().emitError() << "target Machine unavailable for triple "
+                               << triple << ", can't compile with LLVM";
     return std::nullopt;
   }
 
@@ -432,12 +428,13 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
   std::optional<std::string> serializedISA =
       translateToISA(llvmModule, **targetMachine);
   if (!serializedISA) {
-    getOperation().emitError() << "Failed translating the module to ISA.";
+    getOperation().emitError() << "failed translating the module to ISA";
     return std::nullopt;
   }
 #define DEBUG_TYPE "serialize-to-isa"
   LLVM_DEBUG({
-    llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n"
+    llvm::dbgs() << "ISA for module: "
+                 << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n"
                  << *serializedISA << "\n";
   });
 #undef DEBUG_TYPE
@@ -445,9 +442,41 @@ AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
   if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly)
     return SmallVector<char, 0>(serializedISA->begin(), serializedISA->end());
 
+  // Compiling to binary requires a valid ROCm path, fail if it's not found.
+  if (getToolkitPath().empty()) {
+    getOperation().emitError() << "invalid ROCm path, please set a valid path";
+    return std::nullopt;
+  }
+
   // Compile to binary.
   return compileToBinary(*serializedISA);
 }
+
+#if MLIR_ENABLE_ROCM_CONVERSIONS
+namespace {
+class AMDGPUSerializer : public SerializeGPUModuleBase {
+public:
+  AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
+                   const gpu::TargetOptions &targetOptions);
+
+  std::optional<SmallVector<char, 0>>
+  moduleToObject(llvm::Module &llvmModule) override;
+
+private:
+  // Target options.
+  gpu::TargetOptions targetOptions;
+};
+} // namespace
+
+AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
+                                   const gpu::TargetOptions &targetOptions)
+    : SerializeGPUModuleBase(module, target, targetOptions),
+      targetOptions(targetOptions) {}
+
+std::optional<SmallVector<char, 0>>
+AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
+  return moduleToObjectImpl(targetOptions, llvmModule);
+}
 #endif // MLIR_ENABLE_ROCM_CONVERSIONS
 
 std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
@@ -457,7 +486,7 @@ std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
   if (!module)
     return std::nullopt;
   if (!mlir::isa<gpu::GPUModuleOp>(module)) {
-    module->emitError("Module must be a GPU module.");
+    module->emitError("module must be a GPU module");
     return std::nullopt;
   }
 #if MLIR_ENABLE_ROCM_CONVERSIONS
@@ -466,8 +495,8 @@ std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
   serializer.init();
   return serializer.run();
 #else
-  module->emitError("The `AMDGPU` target was not built. Please enable it when "
-                    "building LLVM.");
+  module->emitError("the `AMDGPU` target was not built. Please enable it when "
+                    "building LLVM");
   return std::nullopt;
 #endif // MLIR_ENABLE_ROCM_CONVERSIONS
 }
@@ -477,10 +506,15 @@ ROCDLTargetAttrImpl::createObject(Attribute attribute,
                                   const SmallVector<char, 0> &object,
                                   const gpu::TargetOptions &options) const {
   gpu::CompilationTarget format = options.getCompilationTarget();
+  // If format is `fatbin` transform it to binary as `fatbin` is not yet
+  // supported.
+  if (format > gpu::CompilationTarget::Binary)
+    format = gpu::CompilationTarget::Binary;
+
+  DictionaryAttr properties{};
   Builder builder(attribute.getContext());
   return builder.getAttr<gpu::ObjectAttr>(
-      attribute,
-      format > gpu::CompilationTarget::Binary ? gpu::CompilationTarget::Binary
-                                              : format,
-      builder.getStringAttr(StringRef(object.data(), object.size())), nullptr);
+      attribute, format,
+      builder.getStringAttr(StringRef(object.data(), object.size())),
+      properties);
 }