[Mlir-commits] [mlir] 4d295cf - [mlir] Add base class for GpuKernelToBlobPass
Christian Sigg
llvmlistbot at llvm.org
Wed Mar 10 03:14:55 PST 2021
Author: Christian Sigg
Date: 2021-03-10T12:14:43+01:00
New Revision: 4d295cf5b54e03133e5282843a76c247b676e478
URL: https://github.com/llvm/llvm-project/commit/4d295cf5b54e03133e5282843a76c247b676e478
DIFF: https://github.com/llvm/llvm-project/commit/4d295cf5b54e03133e5282843a76c247b676e478.diff
LOG: [mlir] Add base class for GpuKernelToBlobPass
Instead of configuring kernel-to-cubin/rocdl lowering through callbacks, introduce a base class that target-specific passes can derive from.
Put the base class in GPU/Transforms, according to the discussion in D98203.
The mlir-cuda-runner will go away shortly, and the mlir-rocdl-runner as well at some point. I therefore kept the existing code path working and will remove it in a separate step.
Depends On D98168
Reviewed By: herhut
Differential Revision: https://reviews.llvm.org/D98279
Added:
mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp
Modified:
mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
mlir/include/mlir/Dialect/GPU/Passes.h
mlir/lib/Conversion/GPUCommon/CMakeLists.txt
mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp
mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
mlir/lib/Dialect/GPU/CMakeLists.txt
Removed:
################################################################################
diff --git a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
index 4ef1d8a811c3..fb5e8202df63 100644
--- a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
+++ b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
@@ -9,9 +9,14 @@
#define MLIR_CONVERSION_GPUCOMMON_GPUCOMMONPASS_H_
#include "mlir/Support/LLVM.h"
-#include "llvm/IR/Module.h"
+#include "llvm/ADT/StringRef.h"
#include <vector>
+namespace llvm {
+class LLVMContext;
+class Module;
+} // namespace llvm
+
namespace mlir {
class LLVMTypeConverter;
@@ -26,9 +31,6 @@ class OperationPass;
namespace gpu {
class GPUModuleOp;
-
-/// Returns the default annotation name for GPU binary blobs.
-std::string getDefaultGpuBinaryAnnotation();
} // namespace gpu
namespace LLVM {
diff --git a/mlir/include/mlir/Dialect/GPU/Passes.h b/mlir/include/mlir/Dialect/GPU/Passes.h
index c3a40a044bcd..c280026e6de9 100644
--- a/mlir/include/mlir/Dialect/GPU/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Passes.h
@@ -13,8 +13,15 @@
#ifndef MLIR_DIALECT_GPU_PASSES_H_
#define MLIR_DIALECT_GPU_PASSES_H_
+#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Pass/Pass.h"
+namespace llvm {
+class TargetMachine;
+class LLVMContext;
+class Module;
+} // namespace llvm
+
namespace mlir {
/// Replaces `gpu.launch` with `gpu.launch_func` by moving the region into
/// a separate kernel function.
@@ -33,6 +40,45 @@ inline void populateGpuRewritePatterns(MLIRContext *context,
populateGpuAllReducePatterns(context, patterns);
}
+namespace gpu {
+/// Returns the default annotation name for GPU binary blobs.
+std::string getDefaultGpuBinaryAnnotation();
+
+/// Base pass class to serialize kernel functions through LLVM into
+/// user-specified IR and add the resulting blob as module attribute.
+class SerializeToBlobPass : public OperationPass<gpu::GPUModuleOp> {
+public:
+ SerializeToBlobPass(TypeID passID);
+ SerializeToBlobPass(const SerializeToBlobPass &other);
+
+ void runOnOperation() final;
+
+private:
+ // Creates the LLVM target machine to generate the ISA.
+ std::unique_ptr<llvm::TargetMachine> createTargetMachine();
+
+ // Translates the 'getOperation()' result to an LLVM module.
+ virtual std::unique_ptr<llvm::Module>
+ translateToLLVMIR(llvm::LLVMContext &llvmContext) = 0;
+
+ // Serializes the target ISA to binary form.
+ virtual std::unique_ptr<std::vector<char>>
+ serializeISA(const std::string &isa) = 0;
+
+protected:
+ Option<std::string> triple{*this, "triple",
+ ::llvm::cl::desc("Target triple")};
+ Option<std::string> chip{*this, "chip",
+ ::llvm::cl::desc("Target architecture")};
+ Option<std::string> features{*this, "features",
+ ::llvm::cl::desc("Target features")};
+ Option<std::string> gpuBinaryAnnotation{
+ *this, "gpu-binary-annotation",
+ llvm::cl::desc("Annotation attribute string for GPU binary"),
+ llvm::cl::init(getDefaultGpuBinaryAnnotation())};
+};
+} // namespace gpu
+
//===----------------------------------------------------------------------===//
// Registration
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Conversion/GPUCommon/CMakeLists.txt b/mlir/lib/Conversion/GPUCommon/CMakeLists.txt
index 825bed600aba..53da5e00233a 100644
--- a/mlir/lib/Conversion/GPUCommon/CMakeLists.txt
+++ b/mlir/lib/Conversion/GPUCommon/CMakeLists.txt
@@ -24,8 +24,6 @@ add_mlir_conversion_library(MLIRGPUToGPURuntimeTransforms
intrinsics_gen
LINK_COMPONENTS
- Core
- MC
${AMDGPU_LIBS}
${NVPTX_LIBS}
diff --git a/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp b/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp
index 9b5fc8f721da..2f57524d8425 100644
--- a/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp
+++ b/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp
@@ -15,6 +15,7 @@
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
+#include "mlir/Dialect/GPU/Passes.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
@@ -25,14 +26,10 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/IR/Module.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
-#include "llvm/Target/TargetMachine.h"
using namespace mlir;
@@ -45,126 +42,43 @@ namespace {
/// GPU binary code, which is then attached as an attribute to the function.
/// The function body is erased.
class GpuKernelToBlobPass
- : public PassWrapper<GpuKernelToBlobPass, OperationPass<gpu::GPUModuleOp>> {
+ : public PassWrapper<GpuKernelToBlobPass, gpu::SerializeToBlobPass> {
public:
GpuKernelToBlobPass(LoweringCallback loweringCallback,
BlobGenerator blobGenerator, StringRef triple,
StringRef targetChip, StringRef features,
StringRef gpuBinaryAnnotation)
- : loweringCallback(loweringCallback), blobGenerator(blobGenerator),
- triple(triple), targetChip(targetChip), features(features) {
+ : loweringCallback(loweringCallback), blobGenerator(blobGenerator) {
+ if (!triple.empty())
+ this->triple = triple.str();
+ if (!targetChip.empty())
+ this->chip = targetChip.str();
+ if (!features.empty())
+ this->features = features.str();
if (!gpuBinaryAnnotation.empty())
this->gpuBinaryAnnotation = gpuBinaryAnnotation.str();
}
- GpuKernelToBlobPass(const GpuKernelToBlobPass &other)
- : loweringCallback(other.loweringCallback),
- blobGenerator(other.blobGenerator), triple(other.triple),
- targetChip(other.targetChip), features(other.features) {}
-
- void runOnOperation() override {
- gpu::GPUModuleOp module = getOperation();
-
- // Lower the module to an LLVM IR module using a separate context to enable
- // multi-threaded processing.
- llvm::LLVMContext llvmContext;
- std::unique_ptr<llvm::Module> llvmModule =
- loweringCallback(module, llvmContext, "LLVMDialectModule");
- if (!llvmModule)
- return signalPassFailure();
-
- // Translate the llvm module to a target blob and attach the result as
- // attribute to the module.
- if (auto blobAttr = translateGPUModuleToBinaryAnnotation(
- *llvmModule, module.getLoc(), module.getName()))
- module->setAttr(gpuBinaryAnnotation, blobAttr);
- else
- signalPassFailure();
- }
-
private:
- std::string translateModuleToISA(llvm::Module &module,
- llvm::TargetMachine &targetMachine);
-
- /// Converts llvmModule to a blob with target instructions using the
- /// user-provided generator. Location is used for error reporting and name is
- /// forwarded to the blob generator to use in its logging mechanisms.
- OwnedBlob convertModuleToBlob(llvm::Module &llvmModule, Location loc,
- StringRef name);
+ // Translates the 'getOperation()' result to an LLVM module.
+ std::unique_ptr<llvm::Module>
+ translateToLLVMIR(llvm::LLVMContext &llvmContext) override {
+ return loweringCallback(getOperation(), llvmContext, "LLVMDialectModule");
+ }
- /// Translates llvmModule to a blob with target instructions and returns the
- /// result as attribute.
- StringAttr translateGPUModuleToBinaryAnnotation(llvm::Module &llvmModule,
- Location loc, StringRef name);
+ // Serializes the target ISA to binary form.
+ std::unique_ptr<std::vector<char>>
+ serializeISA(const std::string &isa) override {
+ return blobGenerator(isa, getOperation().getLoc(),
+ getOperation().getName());
+ }
LoweringCallback loweringCallback;
BlobGenerator blobGenerator;
-
- llvm::Triple triple;
- std::string targetChip;
- std::string features;
-
- Option<std::string> gpuBinaryAnnotation{
- *this, "gpu-binary-annotation",
- llvm::cl::desc("Annotation attribute string for GPU binary"),
- llvm::cl::init(gpu::getDefaultGpuBinaryAnnotation())};
};
} // anonymous namespace
-std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; }
-
-std::string
-GpuKernelToBlobPass::translateModuleToISA(llvm::Module &module,
- llvm::TargetMachine &targetMachine) {
- std::string targetISA;
- {
- llvm::raw_string_ostream stream(targetISA);
- llvm::buffer_ostream pstream(stream);
- llvm::legacy::PassManager codegenPasses;
- targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
- llvm::CGFT_AssemblyFile);
- codegenPasses.run(module);
- }
-
- return targetISA;
-}
-
-OwnedBlob GpuKernelToBlobPass::convertModuleToBlob(llvm::Module &llvmModule,
- Location loc,
- StringRef name) {
- std::unique_ptr<llvm::TargetMachine> targetMachine;
- {
- std::string error;
- const llvm::Target *target =
- llvm::TargetRegistry::lookupTarget("", triple, error);
- if (target == nullptr) {
- emitError(loc, "cannot initialize target triple");
- return {};
- }
- targetMachine.reset(target->createTargetMachine(triple.str(), targetChip,
- features, {}, {}));
- if (targetMachine == nullptr) {
- emitError(loc, "cannot initialize target machine");
- return {};
- }
- }
-
- llvmModule.setDataLayout(targetMachine->createDataLayout());
-
- auto targetISA = translateModuleToISA(llvmModule, *targetMachine);
-
- return blobGenerator(targetISA, loc, name);
-}
-
-StringAttr GpuKernelToBlobPass::translateGPUModuleToBinaryAnnotation(
- llvm::Module &llvmModule, Location loc, StringRef name) {
- auto blob = convertModuleToBlob(llvmModule, loc, name);
- if (!blob)
- return {};
- return StringAttr::get(loc->getContext(), {blob->data(), blob->size()});
-}
-
std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
mlir::createConvertGPUKernelToBlobPass(LoweringCallback loweringCallback,
BlobGenerator blobGenerator,
diff --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
index 3a6cffd34268..44dfd730e44f 100644
--- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
+++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
@@ -20,6 +20,7 @@
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
#include "mlir/Dialect/Async/IR/Async.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
+#include "mlir/Dialect/GPU/Passes.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
@@ -27,10 +28,6 @@
#include "mlir/IR/BuiltinTypes.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FormatVariadic.h"
diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt
index 39d387964441..ed0113800623 100644
--- a/mlir/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/CMakeLists.txt
@@ -5,10 +5,15 @@ add_mlir_dialect_library(MLIRGPU
Transforms/KernelOutlining.cpp
Transforms/MemoryPromotion.cpp
Transforms/ParallelLoopMapper.cpp
+ Transforms/SerializeToBlob.cpp
ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU
+ LINK_COMPONENTS
+ Core
+ MC
+
DEPENDS
MLIRGPUOpsIncGen
MLIRGPUOpInterfacesIncGen
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp
new file mode 100644
index 000000000000..b6e3c7082024
--- /dev/null
+++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp
@@ -0,0 +1,95 @@
+//===- SerializeToBlob.cpp - MLIR GPU lowering pass -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a base class for a pass to serialize a gpu module
+// into a binary blob that can be executed on a GPU. The binary blob is added
+// as a string attribute to the gpu module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/GPU/Passes.h"
+#include "mlir/Pass/Pass.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace mlir;
+
+std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; }
+
+gpu::SerializeToBlobPass::SerializeToBlobPass(TypeID passID)
+ : OperationPass<gpu::GPUModuleOp>(passID) {}
+
+gpu::SerializeToBlobPass::SerializeToBlobPass(const SerializeToBlobPass &other)
+ : OperationPass<gpu::GPUModuleOp>(other) {
+ // Pass::Option has no copy constructor, copy them manually.
+ triple = other.triple;
+ chip = other.chip;
+ features = other.features;
+ gpuBinaryAnnotation = other.gpuBinaryAnnotation;
+}
+
+static std::string translateToISA(llvm::Module &llvmModule,
+ llvm::TargetMachine &targetMachine) {
+ llvmModule.setDataLayout(targetMachine.createDataLayout());
+
+ std::string targetISA;
+ llvm::raw_string_ostream stream(targetISA);
+ llvm::buffer_ostream pstream(stream);
+ llvm::legacy::PassManager codegenPasses;
+ targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
+ llvm::CGFT_AssemblyFile);
+ codegenPasses.run(llvmModule);
+ return targetISA;
+}
+
+void gpu::SerializeToBlobPass::runOnOperation() {
+ // Lower the module to an LLVM IR module using a separate context to enable
+ // multi-threaded processing.
+ llvm::LLVMContext llvmContext;
+ std::unique_ptr<llvm::Module> llvmModule = translateToLLVMIR(llvmContext);
+ if (!llvmModule)
+ return signalPassFailure();
+
+ // Lower the LLVM IR module to target ISA.
+ std::unique_ptr<llvm::TargetMachine> targetMachine = createTargetMachine();
+ if (!targetMachine)
+ return signalPassFailure();
+
+ std::string targetISA = translateToISA(*llvmModule, *targetMachine);
+
+ // Serialize the target ISA.
+ std::unique_ptr<std::vector<char>> blob = serializeISA(targetISA);
+ if (!blob)
+ return signalPassFailure();
+
+ // Add the blob as module attribute.
+ auto attr = StringAttr::get(&getContext(), {blob->data(), blob->size()});
+ getOperation()->setAttr(gpuBinaryAnnotation, attr);
+}
+
+std::unique_ptr<llvm::TargetMachine>
+gpu::SerializeToBlobPass::createTargetMachine() {
+ Location loc = getOperation().getLoc();
+ std::string error;
+ const llvm::Target *target =
+ llvm::TargetRegistry::lookupTarget(triple, error);
+ if (!target) {
+ emitError(loc, Twine("failed to lookup target: ") + error);
+ return {};
+ }
+ llvm::TargetMachine *machine =
+ target->createTargetMachine(triple, chip, features, {}, {});
+ if (!machine) {
+ emitError(loc, "failed to create target machine");
+ return {};
+ }
+
+ return std::unique_ptr<llvm::TargetMachine>{machine};
+}
More information about the Mlir-commits
mailing list