[Mlir-commits] [mlir] [mlir][gpu] Remove old GPU serialization passes (PR #94998)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Mon Jun 10 16:51:56 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
Author: Fabian Mora (fabianmcg)
<details>
<summary>Changes</summary>
This patch removes the last vestiges of the old gpu serialization pipeline. To compile GPU code use target attributes instead.
---
Patch is 30.10 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/94998.diff
6 Files Affected:
- (modified) mlir/include/mlir/Conversion/Passes.td (+1-1)
- (modified) mlir/include/mlir/Dialect/GPU/Transforms/Passes.h (-63)
- (modified) mlir/include/mlir/Dialect/GPU/Transforms/Utils.h (-3)
- (modified) mlir/lib/Dialect/GPU/CMakeLists.txt (-43)
- (removed) mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp (-153)
- (removed) mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp (-458)
``````````diff
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index eb58f4adc31d3..f93b2a3cabde7 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -480,7 +480,7 @@ def GpuToLLVMConversionPass : Pass<"gpu-to-llvm", "ModuleOp"> {
"The kernel must use the same setting for this option."
>,
Option<"gpuBinaryAnnotation", "gpu-binary-annotation", "std::string",
- /*default=*/"gpu::getDefaultGpuBinaryAnnotation()",
+ /*default=*/"",
"Annotation attribute string for GPU binary"
>
];
diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
index 8f7466a697d85..a20bae86ace28 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
@@ -91,75 +91,12 @@ namespace gpu {
LogicalResult transformGpuModulesToBinaries(
Operation *op, OffloadingLLVMTranslationAttrInterface handler = nullptr,
const gpu::TargetOptions &options = {});
-
-/// Base pass class to serialize kernel functions through LLVM into
-/// user-specified IR and add the resulting blob as module attribute.
-class SerializeToBlobPass : public OperationPass<gpu::GPUModuleOp> {
-public:
- SerializeToBlobPass(TypeID passID);
- SerializeToBlobPass(const SerializeToBlobPass &other);
-
- void runOnOperation() final;
-
-protected:
- /// Hook allowing the application of optimizations before codegen
- /// By default, does nothing
- virtual LogicalResult optimizeLlvm(llvm::Module &llvmModule,
- llvm::TargetMachine &targetMachine);
-
- /// Translates the 'getOperation()' result to an LLVM module.
- virtual std::unique_ptr<llvm::Module>
- translateToLLVMIR(llvm::LLVMContext &llvmContext);
-
-private:
- /// Creates the LLVM target machine to generate the ISA.
- std::unique_ptr<llvm::TargetMachine> createTargetMachine();
-
- /// Translates the module to ISA
- std::optional<std::string> translateToISA(llvm::Module &llvmModule,
- llvm::TargetMachine &targetMachine);
-
- /// Serializes the target ISA to binary form.
- virtual std::unique_ptr<std::vector<char>>
- serializeISA(const std::string &isa) = 0;
-
-protected:
- Option<std::string> triple{*this, "triple",
- ::llvm::cl::desc("Target triple")};
- Option<std::string> chip{*this, "chip",
- ::llvm::cl::desc("Target architecture")};
- Option<std::string> features{*this, "features",
- ::llvm::cl::desc("Target features")};
- Option<int> optLevel{*this, "opt-level",
- llvm::cl::desc("Optimization level for compilation"),
- llvm::cl::init(2)};
- Option<std::string> gpuBinaryAnnotation{
- *this, "gpu-binary-annotation",
- llvm::cl::desc("Annotation attribute string for GPU binary"),
- llvm::cl::init(getDefaultGpuBinaryAnnotation())};
- Option<bool> dumpPtx{*this, "dump-ptx",
- ::llvm::cl::desc("Dump generated PTX"),
- llvm::cl::init(false)};
-};
} // namespace gpu
//===----------------------------------------------------------------------===//
// Registration
//===----------------------------------------------------------------------===//
-/// Register pass to serialize GPU kernel functions to a HSAco binary
-/// annotation.
-LLVM_DEPRECATED("use Target attributes instead", "")
-void registerGpuSerializeToHsacoPass();
-
-/// Create an instance of the GPU kernel function to HSAco binary serialization
-/// pass.
-LLVM_DEPRECATED("use Target attributes instead", "")
-std::unique_ptr<Pass> createGpuSerializeToHsacoPass(StringRef triple,
- StringRef arch,
- StringRef features,
- int optLevel);
-
/// Collect a set of patterns to decompose memrefs ops.
void populateGpuDecomposeMemrefsPatterns(RewritePatternSet &patterns);
diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Utils.h b/mlir/include/mlir/Dialect/GPU/Transforms/Utils.h
index f25c506fd638d..f8c018ef40bba 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Utils.h
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Utils.h
@@ -28,9 +28,6 @@ namespace gpu {
class GPUFuncOp;
class LaunchOp;
-/// Returns the default annotation name for GPU binary blobs.
-std::string getDefaultGpuBinaryAnnotation();
-
/// Returns the matching vector combining kind.
vector::CombiningKind convertReductionKind(gpu::AllReduceOperation mode);
} // namespace gpu
diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt
index 61ab298ebfb98..1934744c47fc9 100644
--- a/mlir/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/CMakeLists.txt
@@ -1,17 +1,3 @@
-if (MLIR_ENABLE_ROCM_CONVERSIONS)
- set(AMDGPU_LIBS
- IRReader
- IPO
- linker
- MCParser
- AMDGPUAsmParser
- AMDGPUCodeGen
- AMDGPUDesc
- AMDGPUInfo
- target
- )
-endif()
-
add_mlir_dialect_library(MLIRGPUDialect
IR/GPUDialect.cpp
IR/InferIntRangeInterfaceImpls.cpp
@@ -51,8 +37,6 @@ add_mlir_dialect_library(MLIRGPUTransforms
Transforms/NVVMAttachTarget.cpp
Transforms/ParallelLoopMapper.cpp
Transforms/ROCDLAttachTarget.cpp
- Transforms/SerializeToBlob.cpp
- Transforms/SerializeToHsaco.cpp
Transforms/ShuffleRewriter.cpp
Transforms/SPIRVAttachTarget.cpp
Transforms/SubgroupReduceLowering.cpp
@@ -61,12 +45,6 @@ add_mlir_dialect_library(MLIRGPUTransforms
ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU
- LINK_COMPONENTS
- Core
- MC
- Target
- ${AMDGPU_LIBS}
-
DEPENDS
MLIRGPUPassIncGen
MLIRParallelLoopMapperEnumsGen
@@ -76,15 +54,12 @@ add_mlir_dialect_library(MLIRGPUTransforms
MLIRArithDialect
MLIRAsyncDialect
MLIRBufferizationDialect
- MLIRBuiltinToLLVMIRTranslation
MLIRDataLayoutInterfaces
MLIRExecutionEngineUtils
MLIRGPUDialect
MLIRIR
MLIRIndexDialect
MLIRLLVMDialect
- MLIRGPUToLLVMIRTranslation
- MLIRLLVMToLLVMIRTranslation
MLIRMemRefDialect
MLIRNVVMTarget
MLIRPass
@@ -99,21 +74,3 @@ add_mlir_dialect_library(MLIRGPUTransforms
add_subdirectory(TransformOps)
add_subdirectory(Pipelines)
-
-if(MLIR_ENABLE_ROCM_CONVERSIONS)
- if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD))
- message(SEND_ERROR
- "Building mlir with ROCm support requires the AMDGPU backend")
- endif()
-
- set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs")
- target_compile_definitions(obj.MLIRGPUTransforms
- PRIVATE
- __DEFAULT_ROCM_PATH__="${DEFAULT_ROCM_PATH}"
- )
-
- target_link_libraries(MLIRGPUTransforms
- PRIVATE
- MLIRROCDLToLLVMIRTranslation
- )
-endif()
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp
deleted file mode 100644
index 1fdfe972a8b59..0000000000000
--- a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-//===- SerializeToBlob.cpp - MLIR GPU lowering pass -----------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a base class for a pass to serialize a gpu module
-// into a binary blob that can be executed on a GPU. The binary blob is added
-// as a string attribute to the gpu module.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/GPU/IR/GPUDialect.h"
-#include "mlir/Dialect/GPU/Transforms/Passes.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/ExecutionEngine/OptUtils.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Export.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/MC/TargetRegistry.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Target/TargetMachine.h"
-
-#include <optional>
-#include <string>
-
-#define DEBUG_TYPE "serialize-to-blob"
-
-using namespace mlir;
-
-std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; }
-
-gpu::SerializeToBlobPass::SerializeToBlobPass(TypeID passID)
- : OperationPass<gpu::GPUModuleOp>(passID) {}
-
-gpu::SerializeToBlobPass::SerializeToBlobPass(const SerializeToBlobPass &other)
- : OperationPass<gpu::GPUModuleOp>(other) {}
-
-std::optional<std::string>
-gpu::SerializeToBlobPass::translateToISA(llvm::Module &llvmModule,
- llvm::TargetMachine &targetMachine) {
- llvmModule.setDataLayout(targetMachine.createDataLayout());
-
- if (failed(optimizeLlvm(llvmModule, targetMachine)))
- return std::nullopt;
-
- std::string targetISA;
- llvm::raw_string_ostream stream(targetISA);
-
- { // Drop pstream after this to prevent the ISA from being stuck buffering
- llvm::buffer_ostream pstream(stream);
- llvm::legacy::PassManager codegenPasses;
-
- if (targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
- llvm::CodeGenFileType::AssemblyFile))
- return std::nullopt;
-
- codegenPasses.run(llvmModule);
- }
- return stream.str();
-}
-
-void gpu::SerializeToBlobPass::runOnOperation() {
- // Lower the module to an LLVM IR module using a separate context to enable
- // multi-threaded processing.
- llvm::LLVMContext llvmContext;
- std::unique_ptr<llvm::Module> llvmModule = translateToLLVMIR(llvmContext);
- if (!llvmModule)
- return signalPassFailure();
-
- // Lower the LLVM IR module to target ISA.
- std::unique_ptr<llvm::TargetMachine> targetMachine = createTargetMachine();
- if (!targetMachine)
- return signalPassFailure();
-
- std::optional<std::string> maybeTargetISA =
- translateToISA(*llvmModule, *targetMachine);
-
- if (!maybeTargetISA.has_value())
- return signalPassFailure();
-
- std::string targetISA = std::move(*maybeTargetISA);
-
- LLVM_DEBUG({
- llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n";
- llvm::dbgs() << targetISA << "\n";
- llvm::dbgs().flush();
- });
-
- // Serialize the target ISA.
- std::unique_ptr<std::vector<char>> blob = serializeISA(targetISA);
- if (!blob)
- return signalPassFailure();
-
- // Add the blob as module attribute.
- auto attr =
- StringAttr::get(&getContext(), StringRef(blob->data(), blob->size()));
- getOperation()->setAttr(gpuBinaryAnnotation, attr);
-}
-
-LogicalResult
-gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule,
- llvm::TargetMachine &targetMachine) {
- int optLevel = this->optLevel.getValue();
- if (optLevel < 0 || optLevel > 3)
- return getOperation().emitError()
- << "invalid optimization level " << optLevel;
-
- targetMachine.setOptLevel(static_cast<llvm::CodeGenOptLevel>(optLevel));
-
- auto transformer =
- makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine);
- auto error = transformer(&llvmModule);
- if (error) {
- InFlightDiagnostic mlirError = getOperation()->emitError();
- llvm::handleAllErrors(
- std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) {
- mlirError << "could not optimize LLVM IR: " << ei.message();
- });
- return mlirError;
- }
- return success();
-}
-
-std::unique_ptr<llvm::TargetMachine>
-gpu::SerializeToBlobPass::createTargetMachine() {
- Location loc = getOperation().getLoc();
- std::string error;
- const llvm::Target *target =
- llvm::TargetRegistry::lookupTarget(triple, error);
- if (!target) {
- emitError(loc, Twine("failed to lookup target: ") + error);
- return {};
- }
- llvm::TargetMachine *machine =
- target->createTargetMachine(triple, chip, features, {}, {});
- if (!machine) {
- emitError(loc, "failed to create target machine");
- return {};
- }
-
- return std::unique_ptr<llvm::TargetMachine>{machine};
-}
-
-std::unique_ptr<llvm::Module>
-gpu::SerializeToBlobPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
- return translateModuleToLLVMIR(getOperation(), llvmContext,
- "LLVMDialectModule");
-}
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
deleted file mode 100644
index a4f19981eec38..0000000000000
--- a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
+++ /dev/null
@@ -1,458 +0,0 @@
-//===- LowerGPUToHSACO.cpp - Convert GPU kernel to HSACO blob -------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass that serializes a gpu module into HSAco blob and
-// adds that blob as a string attribute of the module.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Config/mlir-config.h"
-#include "mlir/Dialect/GPU/Transforms/Passes.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/MLIRContext.h"
-
-#if MLIR_ENABLE_ROCM_CONVERSIONS
-#include "mlir/ExecutionEngine/OptUtils.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/FileUtilities.h"
-#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Export.h"
-
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IRReader/IRReader.h"
-#include "llvm/Linker/Linker.h"
-
-#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCParser/MCTargetAsmParser.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/TargetRegistry.h"
-
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/FileUtilities.h"
-#include "llvm/Support/Path.h"
-#include "llvm/Support/Program.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Support/Threading.h"
-#include "llvm/Support/WithColor.h"
-
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-
-#include "llvm/Transforms/IPO/Internalize.h"
-
-#include <optional>
-
-using namespace mlir;
-
-namespace {
-class SerializeToHsacoPass
- : public PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass> {
- static llvm::once_flag initializeBackendOnce;
-
-public:
- MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SerializeToHsacoPass)
-
- SerializeToHsacoPass(StringRef triple, StringRef arch, StringRef features,
- int optLevel);
- SerializeToHsacoPass(const SerializeToHsacoPass &other);
- StringRef getArgument() const override { return "gpu-to-hsaco"; }
- StringRef getDescription() const override {
- return "Lower GPU kernel function to HSACO binary annotations";
- }
-
-protected:
- Option<std::string> rocmPath{*this, "rocm-path",
- llvm::cl::desc("Path to ROCm install")};
-
- // Overload to allow linking in device libs
- std::unique_ptr<llvm::Module>
- translateToLLVMIR(llvm::LLVMContext &llvmContext) override;
-
-private:
- // Loads LLVM bitcode libraries
- std::optional<SmallVector<std::unique_ptr<llvm::Module>, 3>>
- loadLibraries(SmallVectorImpl<char> &path,
- SmallVectorImpl<StringRef> &libraries,
- llvm::LLVMContext &context);
-
- // Serializes ROCDL to HSACO.
- std::unique_ptr<std::vector<char>>
- serializeISA(const std::string &isa) override;
-
- LogicalResult assembleIsa(const std::string &isa,
- SmallVectorImpl<char> &result);
- std::unique_ptr<std::vector<char>> createHsaco(ArrayRef<char> isaBinary);
-
- std::string getRocmPath();
-};
-} // namespace
-
-SerializeToHsacoPass::SerializeToHsacoPass(const SerializeToHsacoPass &other)
- : PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass>(other) {}
-
-/// Get a user-specified path to ROCm
-// Tries, in order, the --rocm-path option, the ROCM_PATH environment variable
-// and a compile-time default
-std::string SerializeToHsacoPass::getRocmPath() {
- if (rocmPath.getNumOccurrences() > 0)
- return rocmPath.getValue();
-
- return __DEFAULT_ROCM_PATH__;
-}
-
-// Sets the 'option' to 'value' unless it already has a value.
-static void maybeSetOption(Pass::Option<std::string> &option,
- function_ref<std::string()> getValue) {
- if (!option.hasValue())
- option = getValue();
-}
-
-llvm::once_flag SerializeToHsacoPass::initializeBackendOnce;
-
-SerializeToHsacoPass::SerializeToHsacoPass(StringRef triple, StringRef arch,
- StringRef features, int optLevel) {
- // No matter how this pass is constructed, ensure that the AMDGPU backend
- // is initialized exactly once.
- llvm::call_once(initializeBackendOnce, []() {
- // Initialize LLVM AMDGPU backend.
- LLVMInitializeAMDGPUAsmParser();
- LLVMInitializeAMDGPUAsmPrinter();
- LLVMInitializeAMDGPUTarget();
- LLVMInitializeAMDGPUTargetInfo();
- LLVMInitializeAMDGPUTargetMC();
- });
- maybeSetOption(this->triple, [&triple] { return triple.str(); });
- maybeSetOption(this->chip, [&arch] { return arch.str(); });
- maybeSetOption(this->features, [&features] { return features.str(); });
- if (this->optLevel.getNumOccurrences() == 0)
- this->optLevel.setValue(optLevel);
-}
-
-std::optional<SmallVector<std::unique_ptr<llvm::Module>, 3>>
-SerializeToHsacoPass::loadLibraries(SmallVectorImpl<char> &path,
- SmallVectorImpl<StringRef> &libraries,
- llvm::LLVMContext &context) {
- SmallVector<std::unique_ptr<llvm::Module>, 3> ret;
- size_t dirLength = path.size();
-
- if (!llvm::sys::fs::is_directory(path)) {
- getOperation().emitRemark() << "Bitcode path: " << path
- << " does not exist or is not a directory\n";
- return std::nullopt;
- }
-
- for (const StringRef file : libraries) {
- llvm::SMDiagnostic error;
- llvm::sys::path::append(path, file);
- llvm::StringRef pathRef(path.data(), path.size());
- std::unique_ptr<llvm::Module> library =
- llvm::getLazyIRFileModule(pathRef, error, context);
- path.truncate(dirLength);
- if (!library) {
- getOperation().emitError() << "Failed to load library " << file
- << " from " << path << error.getMessage();
- return std::nullopt;
- }
- // Some ROCM builds don't strip this like they should
- if (auto *openclVersion = library->getNamedMetadata("opencl.ocl.version"))
- library->eraseNamedMetadata(openclVersion);
- // Stop spamming us with clang version numbers
- if (auto *ident = library->getNamedMetadata("llvm.ident"))
- library->eraseNamedMetadata(ident);
- ret.push_back(std::move(library));
- }
-
- return std::move(ret);
-}
-
-std::unique_ptr<llvm::Module>
-SerializeToHsacoPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
- // MLIR -> LLVM translation
- std::unique_ptr<llvm::Module> ret =
- gpu::SerializeToBlobPass::translateToLLVMIR(llvmContext);
-
- if (!ret) {
- getOperation().emitOpError("Module lowering failed");
- return ret;
- }
- // Walk the LLVM module in order to determine if we need to link in device
- // libs
- bool needOpenCl = false;
- bool needOckl = false;
- bool needOcml = false;
- for (llvm::Function &f : ret->functions()) {
- if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) {
- StringRef fu...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/94998
More information about the Mlir-commits
mailing list