[llvm-branch-commits] [mlir] 768bda0 - Revert "Reland [mlir][Target] Improve ROCDL gpu serialization API (#95813)"

Mon Jun 17 13:49:55 PDT 2024

Author: Fabian Mora
Date: 2024-06-17T15:49:52-05:00
New Revision: 768bda06368a96d186ed292e15d979f7ee695819

URL: https://github.com/llvm/llvm-project/commit/768bda06368a96d186ed292e15d979f7ee695819
DIFF: https://github.com/llvm/llvm-project/commit/768bda06368a96d186ed292e15d979f7ee695819.diff

LOG: Revert "Reland [mlir][Target] Improve ROCDL gpu serialization API (#95813)"

This reverts commit dcb6c0d71c8dbb6bb17391c968c3716cfafd3765.

Added: 
    

Modified: 
    mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
    mlir/lib/Dialect/GPU/CMakeLists.txt
    mlir/lib/Target/LLVM/CMakeLists.txt
    mlir/lib/Target/LLVM/ROCDL/Target.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
index 44c9ded317fa5..374fa65bd02e3 100644

--- a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
+++ b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
@@ -27,19 +27,6 @@ namespace ROCDL {
 /// 5. Returns an empty string.
 StringRef getROCMPath();
 
-/// Helper enum for specifying the AMD GCN device libraries required for
-/// compilation.
-enum class AMDGCNLibraries : uint32_t {
-  None = 0,
-  Ockl = 1,
-  Ocml = 2,
-  OpenCL = 4,
-  Hip = 8,
-  LastLib = Hip,
-  LLVM_MARK_AS_BITMASK_ENUM(LastLib),
-  All = (LastLib << 1) - 1
-};
-
 /// Base class for all ROCDL serializations from GPU modules into binary
 /// strings. By default this class serializes into LLVM bitcode.
 class SerializeGPUModuleBase : public LLVM::ModuleToObject {
@@ -62,8 +49,8 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
   /// Returns the bitcode files to be loaded.
   ArrayRef<std::string> getFileList() const;
 
-  /// Appends standard ROCm device libraries to `fileList`.
-  LogicalResult appendStandardLibs(AMDGCNLibraries libs);
+  /// Appends standard ROCm device libraries like `ocml.bc`, `ockl.bc`, etc.
+  LogicalResult appendStandardLibs();
 
   /// Loads the bitcode files in `fileList`.
   virtual std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
@@ -76,20 +63,15 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
   LogicalResult handleBitcodeFile(llvm::Module &module) override;
 
 protected:
-  /// Adds `oclc` control variables to the LLVM module.
-  void addControlVariables(llvm::Module &module, AMDGCNLibraries libs,
-                           bool wave64, bool daz, bool finiteOnly,
-                           bool unsafeMath, bool fastMath, bool correctSqrt,
-                           StringRef abiVer);
+  /// Appends the paths of common ROCm device libraries to `libs`.
+  LogicalResult getCommonBitcodeLibs(llvm::SmallVector<std::string> &libs,
+                                     SmallVector<char, 256> &libPath,
+                                     StringRef isaVersion);
 
-  /// Compiles assembly to a binary.
-  virtual std::optional<SmallVector<char, 0>>
-  compileToBinary(const std::string &serializedISA);
-
-  /// Default implementation of `ModuleToObject::moduleToObject`.
-  std::optional<SmallVector<char, 0>>
-  moduleToObjectImpl(const gpu::TargetOptions &targetOptions,
-                     llvm::Module &llvmModule);
+  /// Adds `oclc` control variables to the LLVM module.
+  void addControlVariables(llvm::Module &module, bool wave64, bool daz,
+                           bool finiteOnly, bool unsafeMath, bool fastMath,
+                           bool correctSqrt, StringRef abiVer);
 
   /// Returns the assembled ISA.
   std::optional<SmallVector<char, 0>> assembleIsa(StringRef isa);
@@ -102,9 +84,6 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject {
 
   /// List of LLVM bitcode files to link to.
   SmallVector<std::string> fileList;
-
-  /// AMD GCN libraries to use when linking, the default is using none.
-  AMDGCNLibraries deviceLibs = AMDGCNLibraries::None;
 };
 } // namespace ROCDL
 } // namespace mlir

diff  --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt
index 08c8aea36fac9..61ab298ebfb98 100644
--- a/mlir/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/CMakeLists.txt
@@ -106,7 +106,7 @@ if(MLIR_ENABLE_ROCM_CONVERSIONS)
       "Building mlir with ROCm support requires the AMDGPU backend")
   endif()
 
-  set(DEFAULT_ROCM_PATH "" CACHE PATH "Fallback path to search for ROCm installs")
+  set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs")
   target_compile_definitions(obj.MLIRGPUTransforms
     PRIVATE
     __DEFAULT_ROCM_PATH__="${DEFAULT_ROCM_PATH}"

diff  --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
index 6e146710d67af..5a3fa160850b4 100644
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -123,18 +123,17 @@ add_mlir_dialect_library(MLIRROCDLTarget
   )
 
 if(MLIR_ENABLE_ROCM_CONVERSIONS)
+  if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD))
+    message(SEND_ERROR
+      "Building mlir with ROCm support requires the AMDGPU backend")
+  endif()
+
   if (DEFINED ROCM_PATH)
     set(DEFAULT_ROCM_PATH "${ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs")
   elseif(DEFINED ENV{ROCM_PATH})
     set(DEFAULT_ROCM_PATH "$ENV{ROCM_PATH}" CACHE PATH "Fallback path to search for ROCm installs")
   else()
-    IF (WIN32)
-      # Avoid setting an UNIX path for Windows.
-      # TODO: Eventually migrate to FindHIP once it becomes a part of CMake.
-      set(DEFAULT_ROCM_PATH "" CACHE PATH "Fallback path to search for ROCm installs")
-    else()
-      set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs")
-    endif()
+    set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs")
   endif()
   message(VERBOSE "MLIR Default ROCM toolkit path: ${DEFAULT_ROCM_PATH}")
 

diff  --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
index 6784f3668bde3..cc13e5b7436ea 100644
--- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp
+++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
@@ -17,6 +17,9 @@
 #include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
 #include "mlir/Support/FileUtilities.h"
 #include "mlir/Target/LLVM/ROCDL/Utils.h"
+#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
 #include "mlir/Target/LLVMIR/Export.h"
 
 #include "llvm/IR/Constants.h"
@@ -109,9 +112,8 @@ SerializeGPUModuleBase::SerializeGPUModuleBase(
       if (auto file = dyn_cast<StringAttr>(attr))
         fileList.push_back(file.str());
 
-  // By default add all libraries if the toolkit path is not empty.
-  if (!getToolkitPath().empty())
-    deviceLibs = AMDGCNLibraries::All;
+  // Append standard ROCm device bitcode libraries to the files to be loaded.
+  (void)appendStandardLibs();
 }
 
 void SerializeGPUModuleBase::init() {
@@ -136,67 +138,29 @@ ArrayRef<std::string> SerializeGPUModuleBase::getFileList() const {
   return fileList;
 }
 
-LogicalResult SerializeGPUModuleBase::appendStandardLibs(AMDGCNLibraries libs) {
-  if (libs == AMDGCNLibraries::None)
-    return success();
+LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
   StringRef pathRef = getToolkitPath();
-  // Fail if the toolkit is empty.
-  if (pathRef.empty())
-    return failure();
-
-  // Get the path for the device libraries
-  SmallString<256> path;
-  path.insert(path.begin(), pathRef.begin(), pathRef.end());
-  llvm::sys::path::append(path, "amdgcn", "bitcode");
-  pathRef = StringRef(path.data(), path.size());
-
-  // Fail if the path is invalid.
-  if (!llvm::sys::fs::is_directory(pathRef)) {
-    getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef
-                                << " does not exist or is not a directory";
-    return failure();
-  }
-
-  // Get the ISA version.
-  StringRef isaVersion =
-      llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip));
-  isaVersion.consume_front("gfx");
-
-  // Helper function for adding a library.
-  auto addLib = [&](const Twine &lib) -> bool {
-    auto baseSize = path.size();
-    llvm::sys::path::append(path, lib);
-    StringRef pathRef(path.data(), path.size());
-    if (!llvm::sys::fs::is_regular_file(pathRef)) {
-      getOperation().emitRemark() << "bitcode library path: " << pathRef
-                                  << " does not exist or is not a file";
-      return true;
+  if (!pathRef.empty()) {
+    SmallVector<char, 256> path;
+    path.insert(path.begin(), pathRef.begin(), pathRef.end());
+    llvm::sys::path::append(path, "amdgcn", "bitcode");
+    pathRef = StringRef(path.data(), path.size());
+    if (!llvm::sys::fs::is_directory(pathRef)) {
+      getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef
+                                  << " does not exist or is not a directory.";
+      return failure();
     }
-    fileList.push_back(pathRef.str());
-    path.truncate(baseSize);
-    return false;
-  };
-
-  // Add ROCm device libraries. Fail if any of the libraries is not found, ie.
-  // if any of the `addLib` failed.
-  if ((any(libs & AMDGCNLibraries::Ocml) && addLib("ocml.bc")) ||
-      (any(libs & AMDGCNLibraries::Ockl) && addLib("ockl.bc")) ||
-      (any(libs & AMDGCNLibraries::Hip) && addLib("hip.bc")) ||
-      (any(libs & AMDGCNLibraries::OpenCL) && addLib("opencl.bc")) ||
-      (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl)) &&
-       addLib("oclc_isa_version_" + isaVersion + ".bc")))
-    return failure();
+    StringRef isaVersion =
+        llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip));
+    isaVersion.consume_front("gfx");
+    return getCommonBitcodeLibs(fileList, path, isaVersion);
+  }
   return success();
 }
 
 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
 SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) {
   SmallVector<std::unique_ptr<llvm::Module>> bcFiles;
-  // Return if there are no libs to load.
-  if (deviceLibs == AMDGCNLibraries::None && fileList.empty())
-    return bcFiles;
-  if (failed(appendStandardLibs(deviceLibs)))
-    return std::nullopt;
   if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles,
                                       true)))
     return std::nullopt;
@@ -210,76 +174,80 @@ LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) {
   // Stop spamming us with clang version numbers
   if (auto *ident = module.getNamedMetadata("llvm.ident"))
     module.eraseNamedMetadata(ident);
-  // Override the libModules datalayout and target triple with the compiler's
-  // data layout should there be a discrepency.
-  setDataLayoutAndTriple(module);
   return success();
 }
 
 void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) {
-  // If all libraries are not set, traverse the module to determine which
-  // libraries are required.
-  if (deviceLibs != AMDGCNLibraries::All) {
-    for (llvm::Function &f : module.functions()) {
-      if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) {
-        StringRef funcName = f.getName();
-        if ("printf" == funcName)
-          deviceLibs |= AMDGCNLibraries::OpenCL | AMDGCNLibraries::Ockl |
-                        AMDGCNLibraries::Ocml;
-        if (funcName.starts_with("__ockl_"))
-          deviceLibs |= AMDGCNLibraries::Ockl;
-        if (funcName.starts_with("__ocml_"))
-          deviceLibs |= AMDGCNLibraries::Ocml;
-      }
-    }
-  }
-  addControlVariables(module, deviceLibs, target.hasWave64(), target.hasDaz(),
+  [[maybe_unused]] std::optional<llvm::TargetMachine *> targetMachine =
+      getOrCreateTargetMachine();
+  assert(targetMachine && "expect a TargetMachine");
+  addControlVariables(module, target.hasWave64(), target.hasDaz(),
                       target.hasFiniteOnly(), target.hasUnsafeMath(),
                       target.hasFastMath(), target.hasCorrectSqrt(),
                       target.getAbi());
 }
 
-void SerializeGPUModuleBase::addControlVariables(
-    llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz,
-    bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt,
-    StringRef abiVer) {
-  // Return if no device libraries are required.
-  if (libs == AMDGCNLibraries::None)
-    return;
-  // Helper function for adding control variables.
-  auto addControlVariable = [&module](StringRef name, uint32_t value,
-                                      uint32_t bitwidth) {
-    if (module.getNamedGlobal(name)) {
-      return;
+// Get the paths of ROCm device libraries.
+LogicalResult SerializeGPUModuleBase::getCommonBitcodeLibs(
+    llvm::SmallVector<std::string> &libs, SmallVector<char, 256> &libPath,
+    StringRef isaVersion) {
+  auto addLib = [&](StringRef path) -> bool {
+    if (!llvm::sys::fs::is_regular_file(path)) {
+      getOperation().emitRemark() << "Bitcode library path: " << path
+                                  << " does not exist or is not a file.\n";
+      return true;
     }
-    llvm::IntegerType *type =
-        llvm::IntegerType::getIntNTy(module.getContext(), bitwidth);
+    libs.push_back(path.str());
+    return false;
+  };
+  auto getLibPath = [&libPath](Twine lib) {
+    auto baseSize = libPath.size();
+    llvm::sys::path::append(libPath, lib + ".bc");
+    std::string path(StringRef(libPath.data(), libPath.size()).str());
+    libPath.truncate(baseSize);
+    return path;
+  };
+
+  // Add ROCm device libraries. Fail if any of the libraries is not found.
+  if (addLib(getLibPath("ocml")) || addLib(getLibPath("ockl")) ||
+      addLib(getLibPath("hip")) || addLib(getLibPath("opencl")) ||
+      addLib(getLibPath("oclc_isa_version_" + isaVersion)))
+    return failure();
+  return success();
+}
+
+void SerializeGPUModuleBase::addControlVariables(
+    llvm::Module &module, bool wave64, bool daz, bool finiteOnly,
+    bool unsafeMath, bool fastMath, bool correctSqrt, StringRef abiVer) {
+  llvm::Type *i8Ty = llvm::Type::getInt8Ty(module.getContext());
+  auto addControlVariable = [i8Ty, &module](StringRef name, bool enable) {
     llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable(
-        module, /*isConstant=*/type, true,
-        llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
-        llvm::ConstantInt::get(type, value), name, /*before=*/nullptr,
-        /*threadLocalMode=*/llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
-        /*addressSpace=*/4);
+        module, i8Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
+        llvm::ConstantInt::get(i8Ty, enable), name, nullptr,
+        llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4);
     controlVariable->setVisibility(
         llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
-    controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8));
+    controlVariable->setAlignment(llvm::MaybeAlign(1));
     controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
   };
-  // Add ocml related control variables.
-  if (any(libs & AMDGCNLibraries::Ocml)) {
-    addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath, 8);
-    addControlVariable("__oclc_daz_opt", daz || fastMath, 8);
-    addControlVariable("__oclc_correctly_rounded_sqrt32",
-                       correctSqrt && !fastMath, 8);
-    addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath, 8);
-  }
-  // Add ocml or ockl related control variables.
-  if (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl))) {
-    addControlVariable("__oclc_wavefrontsize64", wave64, 8);
-    int abi = 500;
-    abiVer.getAsInteger(0, abi);
-    addControlVariable("__oclc_ABI_version", abi, 32);
-  }
+  addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath);
+  addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath);
+  addControlVariable("__oclc_daz_opt", daz || fastMath);
+  addControlVariable("__oclc_correctly_rounded_sqrt32",
+                     correctSqrt && !fastMath);
+  addControlVariable("__oclc_wavefrontsize64", wave64);
+
+  llvm::Type *i32Ty = llvm::Type::getInt32Ty(module.getContext());
+  int abi = 500;
+  abiVer.getAsInteger(0, abi);
+  llvm::GlobalVariable *abiVersion = new llvm::GlobalVariable(
+      module, i32Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
+      llvm::ConstantInt::get(i32Ty, abi), "__oclc_ABI_version", nullptr,
+      llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4);
+  abiVersion->setVisibility(
+      llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
+  abiVersion->setAlignment(llvm::MaybeAlign(4));
+  abiVersion->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
 }
 
 std::optional<SmallVector<char, 0>>
@@ -344,16 +312,48 @@ SerializeGPUModuleBase::assembleIsa(StringRef isa) {
 
   parser->setTargetParser(*tap);
   parser->Run(false);
+
   return result;
 }
 
+#if MLIR_ENABLE_ROCM_CONVERSIONS
+namespace {
+class AMDGPUSerializer : public SerializeGPUModuleBase {
+public:
+  AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
+                   const gpu::TargetOptions &targetOptions);
+
+  gpu::GPUModuleOp getOperation();
+
+  // Compile to HSA.
+  std::optional<SmallVector<char, 0>>
+  compileToBinary(const std::string &serializedISA);
+
+  std::optional<SmallVector<char, 0>>
+  moduleToObject(llvm::Module &llvmModule) override;
+
+private:
+  // Target options.
+  gpu::TargetOptions targetOptions;
+};
+} // namespace
+
+AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
+                                   const gpu::TargetOptions &targetOptions)
+    : SerializeGPUModuleBase(module, target, targetOptions),
+      targetOptions(targetOptions) {}
+
+gpu::GPUModuleOp AMDGPUSerializer::getOperation() {
+  return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
+}
+
 std::optional<SmallVector<char, 0>>
-SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) {
+AMDGPUSerializer::compileToBinary(const std::string &serializedISA) {
   // Assemble the ISA.
   std::optional<SmallVector<char, 0>> isaBinary = assembleIsa(serializedISA);
 
   if (!isaBinary) {
-    getOperation().emitError() << "failed during ISA assembling";
+    getOperation().emitError() << "Failed during ISA assembling.";
     return std::nullopt;
   }
 
@@ -363,7 +363,7 @@ SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) {
   if (llvm::sys::fs::createTemporaryFile("kernel%%", "o", tempIsaBinaryFd,
                                          tempIsaBinaryFilename)) {
     getOperation().emitError()
-        << "failed to create a temporary file for dumping the ISA binary";
+        << "Failed to create a temporary file for dumping the ISA binary.";
     return std::nullopt;
   }
   llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
@@ -378,7 +378,7 @@ SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) {
   if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco",
                                          tempHsacoFilename)) {
     getOperation().emitError()
-        << "failed to create a temporary file for the HSA code object";
+        << "Failed to create a temporary file for the HSA code object.";
     return std::nullopt;
   }
   llvm::FileRemover cleanupHsaco(tempHsacoFilename);
@@ -389,7 +389,7 @@ SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) {
       lldPath,
       {"ld.lld", "-shared", tempIsaBinaryFilename, "-o", tempHsacoFilename});
   if (lldResult != 0) {
-    getOperation().emitError() << "lld invocation failed";
+    getOperation().emitError() << "lld invocation failed.";
     return std::nullopt;
   }
 
@@ -398,7 +398,7 @@ SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) {
       llvm::MemoryBuffer::getFile(tempHsacoFilename, /*IsText=*/false);
   if (!hsacoFile) {
     getOperation().emitError()
-        << "failed to read the HSA code object from the temp file";
+        << "Failed to read the HSA code object from the temp file.";
     return std::nullopt;
   }
 
@@ -407,13 +407,13 @@ SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) {
   return SmallVector<char, 0>(buffer.begin(), buffer.end());
 }
 
-std::optional<SmallVector<char, 0>> SerializeGPUModuleBase::moduleToObjectImpl(
-    const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule) {
+std::optional<SmallVector<char, 0>>
+AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
   // Return LLVM IR if the compilation target is offload.
 #define DEBUG_TYPE "serialize-to-llvm"
   LLVM_DEBUG({
-    llvm::dbgs() << "LLVM IR for module: "
-                 << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n"
+    llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr()
+                 << "\n"
                  << llvmModule << "\n";
   });
 #undef DEBUG_TYPE
@@ -423,8 +423,8 @@ std::optional<SmallVector<char, 0>> SerializeGPUModuleBase::moduleToObjectImpl(
   std::optional<llvm::TargetMachine *> targetMachine =
       getOrCreateTargetMachine();
   if (!targetMachine) {
-    getOperation().emitError() << "target Machine unavailable for triple "
-                               << triple << ", can't compile with LLVM";
+    getOperation().emitError() << "Target Machine unavailable for triple "
+                               << triple << ", can't compile with LLVM\n";
     return std::nullopt;
   }
 
@@ -432,13 +432,12 @@ std::optional<SmallVector<char, 0>> SerializeGPUModuleBase::moduleToObjectImpl(
   std::optional<std::string> serializedISA =
       translateToISA(llvmModule, **targetMachine);
   if (!serializedISA) {
-    getOperation().emitError() << "failed translating the module to ISA";
+    getOperation().emitError() << "Failed translating the module to ISA.";
     return std::nullopt;
   }
 #define DEBUG_TYPE "serialize-to-isa"
   LLVM_DEBUG({
-    llvm::dbgs() << "ISA for module: "
-                 << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n"
+    llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n"
                  << *serializedISA << "\n";
   });
 #undef DEBUG_TYPE
@@ -446,45 +445,9 @@ std::optional<SmallVector<char, 0>> SerializeGPUModuleBase::moduleToObjectImpl(
   if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly)
     return SmallVector<char, 0>(serializedISA->begin(), serializedISA->end());
 
-  // Compiling to binary requires a valid ROCm path, fail if it's not found.
-  if (getToolkitPath().empty())
-    getOperation().emitError() << "invalid ROCm path, please set a valid path";
-
   // Compile to binary.
   return compileToBinary(*serializedISA);
 }
-
-#if MLIR_ENABLE_ROCM_CONVERSIONS
-namespace {
-class AMDGPUSerializer : public SerializeGPUModuleBase {
-public:
-  AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
-                   const gpu::TargetOptions &targetOptions);
-
-  gpu::GPUModuleOp getOperation();
-
-  std::optional<SmallVector<char, 0>>
-  moduleToObject(llvm::Module &llvmModule) override;
-
-private:
-  // Target options.
-  gpu::TargetOptions targetOptions;
-};
-} // namespace
-
-AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
-                                   const gpu::TargetOptions &targetOptions)
-    : SerializeGPUModuleBase(module, target, targetOptions),
-      targetOptions(targetOptions) {}
-
-gpu::GPUModuleOp AMDGPUSerializer::getOperation() {
-  return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
-}
-
-std::optional<SmallVector<char, 0>>
-AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
-  return moduleToObjectImpl(targetOptions, llvmModule);
-}
 #endif // MLIR_ENABLE_ROCM_CONVERSIONS
 
 std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
@@ -494,7 +457,7 @@ std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
   if (!module)
     return std::nullopt;
   if (!mlir::isa<gpu::GPUModuleOp>(module)) {
-    module->emitError("module must be a GPU module");
+    module->emitError("Module must be a GPU module.");
     return std::nullopt;
   }
 #if MLIR_ENABLE_ROCM_CONVERSIONS
@@ -503,8 +466,8 @@ std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
   serializer.init();
   return serializer.run();
 #else
-  module->emitError("the `AMDGPU` target was not built. Please enable it when "
-                    "building LLVM");
+  module->emitError("The `AMDGPU` target was not built. Please enable it when "
+                    "building LLVM.");
   return std::nullopt;
 #endif // MLIR_ENABLE_ROCM_CONVERSIONS
 }
@@ -514,15 +477,10 @@ ROCDLTargetAttrImpl::createObject(Attribute attribute,
                                   const SmallVector<char, 0> &object,
                                   const gpu::TargetOptions &options) const {
   gpu::CompilationTarget format = options.getCompilationTarget();
-  // If format is `fatbin` transform it to binary as `fatbin` is not yet
-  // supported.
-  if (format > gpu::CompilationTarget::Binary)
-    format = gpu::CompilationTarget::Binary;
-
-  DictionaryAttr properties{};
   Builder builder(attribute.getContext());
   return builder.getAttr<gpu::ObjectAttr>(
-      attribute, format,
-      builder.getStringAttr(StringRef(object.data(), object.size())),
-      properties);
+      attribute,
+      format > gpu::CompilationTarget::Binary ? gpu::CompilationTarget::Binary
+                                              : format,
+      builder.getStringAttr(StringRef(object.data(), object.size())), nullptr);
 }