[Mlir-commits] [mlir] 01c755f - Make optimize llvm common to both gpu-to-hsaco/cubin
Uday Bondhugula
llvmlistbot at llvm.org
Sun Jun 4 22:16:15 PDT 2023
Author: Vinayaka Bandishti
Date: 2023-06-05T10:32:51+05:30
New Revision: 01c755ff80cbb795f507cf4317b4a4be1a31484d
URL: https://github.com/llvm/llvm-project/commit/01c755ff80cbb795f507cf4317b4a4be1a31484d
DIFF: https://github.com/llvm/llvm-project/commit/01c755ff80cbb795f507cf4317b4a4be1a31484d.diff
LOG: Make optimize llvm common to both gpu-to-hsaco/cubin
Before serializing, optimizations on llvm were only called on path to
hsaco, and not cubin. Define opt-level for `gpu-to-cubin` pass as well,
and move call to optimize llvm to a common place.
Reviewed By: bondhugula
Differential Revision: https://reviews.llvm.org/D151554
Added:
Modified:
mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
mlir/lib/Dialect/GPU/CMakeLists.txt
mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp
mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp
mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
index 89a45a4e4993f..d24d4d89e2493 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
@@ -110,6 +110,9 @@ class SerializeToBlobPass : public OperationPass<gpu::GPUModuleOp> {
::llvm::cl::desc("Target architecture")};
Option<std::string> features{*this, "features",
::llvm::cl::desc("Target features")};
+ Option<int> optLevel{*this, "opt-level",
+ llvm::cl::desc("Optimization level for compilation"),
+ llvm::cl::init(2)};
Option<std::string> gpuBinaryAnnotation{
*this, "gpu-binary-annotation",
llvm::cl::desc("Annotation attribute string for GPU binary"),
@@ -130,10 +133,11 @@ void registerGpuSerializeToCubinPass();
void registerGpuSerializeToHsacoPass();
/// Create an instance of the GPU kernel function to CUBIN binary serialization
-/// pass.
+/// pass with optLevel (default level 2).
std::unique_ptr<Pass> createGpuSerializeToCubinPass(StringRef triple,
StringRef chip,
- StringRef features);
+ StringRef features,
+ int optLevel = 2);
/// Create an instance of the GPU kernel function to HSAco binary serialization
/// pass.
diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt
index ca163338f4237..4250e40eac491 100644
--- a/mlir/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/CMakeLists.txt
@@ -61,6 +61,7 @@ add_mlir_dialect_library(MLIRGPUTransforms
LINK_COMPONENTS
Core
MC
+ Target
${NVPTX_LIBS}
${AMDGPU_LIBS}
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp
index d82e6ca2ba905..97aba90443675 100644
--- a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/GPU/Transforms/Passes.h"
+#include "mlir/ExecutionEngine/OptUtils.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
@@ -102,8 +103,24 @@ void gpu::SerializeToBlobPass::runOnOperation() {
LogicalResult
gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule,
llvm::TargetMachine &targetMachine) {
- // TODO: If serializeToCubin ends up defining optimizations, factor them
- // into here from SerializeToHsaco
+ int optLevel = this->optLevel.getValue();
+ if (optLevel < 0 || optLevel > 3)
+ return getOperation().emitError()
+ << "invalid optimization level " << optLevel;
+
+ targetMachine.setOptLevel(static_cast<llvm::CodeGenOpt::Level>(optLevel));
+
+ auto transformer =
+ makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine);
+ auto error = transformer(&llvmModule);
+ if (error) {
+ InFlightDiagnostic mlirError = getOperation()->emitError();
+ llvm::handleAllErrors(
+ std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) {
+ mlirError << "could not optimize LLVM IR: " << ei.message();
+ });
+ return mlirError;
+ }
return success();
}
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp
index 44a14024e9fef..4df9e6d45470a 100644
--- a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp
@@ -49,7 +49,8 @@ class SerializeToCubinPass
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SerializeToCubinPass)
SerializeToCubinPass(StringRef triple = "nvptx64-nvidia-cuda",
- StringRef chip = "sm_35", StringRef features = "+ptx60");
+ StringRef chip = "sm_35", StringRef features = "+ptx60",
+ int optLevel = 2);
StringRef getArgument() const override { return "gpu-to-cubin"; }
StringRef getDescription() const override {
@@ -72,10 +73,12 @@ static void maybeSetOption(Pass::Option<std::string> &option, StringRef value) {
}
SerializeToCubinPass::SerializeToCubinPass(StringRef triple, StringRef chip,
- StringRef features) {
+ StringRef features, int optLevel) {
maybeSetOption(this->triple, triple);
maybeSetOption(this->chip, chip);
maybeSetOption(this->features, features);
+ if (this->optLevel.getNumOccurrences() == 0)
+ this->optLevel.setValue(optLevel);
}
void SerializeToCubinPass::getDependentDialects(
@@ -147,8 +150,10 @@ void mlir::registerGpuSerializeToCubinPass() {
std::unique_ptr<Pass> mlir::createGpuSerializeToCubinPass(StringRef triple,
StringRef arch,
- StringRef features) {
- return std::make_unique<SerializeToCubinPass>(triple, arch, features);
+ StringRef features,
+ int optLevel) {
+ return std::make_unique<SerializeToCubinPass>(triple, arch, features,
+ optLevel);
}
#else // MLIR_GPU_TO_CUBIN_PASS_ENABLE
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
index 42cb2461395f1..108b8ab945fac 100644
--- a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
@@ -74,11 +74,6 @@ class SerializeToHsacoPass
}
protected:
- Option<int> optLevel{
- *this, "opt-level",
- llvm::cl::desc("Optimization level for HSACO compilation"),
- llvm::cl::init(2)};
-
Option<std::string> rocmPath{*this, "rocm-path",
llvm::cl::desc("Path to ROCm install")};
@@ -86,10 +81,6 @@ class SerializeToHsacoPass
std::unique_ptr<llvm::Module>
translateToLLVMIR(llvm::LLVMContext &llvmContext) override;
- /// Adds LLVM optimization passes
- LogicalResult optimizeLlvm(llvm::Module &llvmModule,
- llvm::TargetMachine &targetMachine) override;
-
private:
void getDependentDialects(DialectRegistry ®istry) const override;
@@ -320,30 +311,6 @@ SerializeToHsacoPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
return ret;
}
-LogicalResult
-SerializeToHsacoPass::optimizeLlvm(llvm::Module &llvmModule,
- llvm::TargetMachine &targetMachine) {
- int optLevel = this->optLevel.getValue();
- if (optLevel < 0 || optLevel > 3)
- return getOperation().emitError()
- << "Invalid HSA optimization level" << optLevel << "\n";
-
- targetMachine.setOptLevel(static_cast<llvm::CodeGenOpt::Level>(optLevel));
-
- auto transformer =
- makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine);
- auto error = transformer(&llvmModule);
- if (error) {
- InFlightDiagnostic mlirError = getOperation()->emitError();
- llvm::handleAllErrors(
- std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) {
- mlirError << "Could not optimize LLVM IR: " << ei.message() << "\n";
- });
- return mlirError;
- }
- return success();
-}
-
std::unique_ptr<SmallVectorImpl<char>>
SerializeToHsacoPass::assembleIsa(const std::string &isa) {
auto loc = getOperation().getLoc();
More information about the Mlir-commits
mailing list