[Mlir-commits] [mlir] 1ef544d - [mlir] Remove mlir-cuda-runner
Christian Sigg
llvmlistbot at llvm.org
Fri Mar 12 05:06:51 PST 2021
Author: Christian Sigg
Date: 2021-03-12T14:06:43+01:00
New Revision: 1ef544d4a94ed4519e8bb86d60e0d52525f3eccd
URL: https://github.com/llvm/llvm-project/commit/1ef544d4a94ed4519e8bb86d60e0d52525f3eccd
DIFF: https://github.com/llvm/llvm-project/commit/1ef544d4a94ed4519e8bb86d60e0d52525f3eccd.diff
LOG: [mlir] Remove mlir-cuda-runner
Change CUDA integration tests to use mlir-opt + mlir-cpu-runner instead.
Depends On D98203
Reviewed By: herhut
Differential Revision: https://reviews.llvm.org/D98396
Added:
Modified:
mlir/test/CMakeLists.txt
mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir
mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir
mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir
mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir
mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir
mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir
mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir
mlir/test/Integration/GPU/CUDA/async.mlir
mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir
mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir
mlir/test/Integration/GPU/CUDA/shuffle.mlir
mlir/test/Integration/GPU/CUDA/two-modules.mlir
mlir/tools/CMakeLists.txt
Removed:
mlir/tools/mlir-cuda-runner/CMakeLists.txt
mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp
################################################################################
diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt
index 1c972d1eead8..e30cca13f92a 100644
--- a/mlir/test/CMakeLists.txt
+++ b/mlir/test/CMakeLists.txt
@@ -88,12 +88,6 @@ if(LLVM_BUILD_EXAMPLES)
)
endif()
-if(MLIR_CUDA_RUNNER_ENABLED)
- list(APPEND MLIR_TEST_DEPENDS
- mlir-cuda-runner
- )
-endif()
-
if(MLIR_ROCM_RUNNER_ENABLED)
list(APPEND MLIR_TEST_DEPENDS
mlir-rocm-runner
diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir
index fca7c4bfb00c..da70bf61836e 100644
--- a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir
+++ b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir
@@ -1,6 +1,8 @@
-// RUN: mlir-cuda-runner %s \
-// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
-// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
+// RUN: mlir-opt %s \
+// RUN: -gpu-kernel-outlining \
+// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
+// RUN: -gpu-to-llvm \
+// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir
index 7a848c5991ef..9edacf3c4099 100644
--- a/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir
+++ b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir
@@ -1,6 +1,8 @@
-// RUN: mlir-cuda-runner %s \
-// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
-// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
+// RUN: mlir-opt %s \
+// RUN: -gpu-kernel-outlining \
+// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
+// RUN: -gpu-to-llvm \
+// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir
index 2e9977ddb280..d88f2f2b43d4 100644
--- a/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir
+++ b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir
@@ -1,6 +1,8 @@
-// RUN: mlir-cuda-runner %s \
-// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
-// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
+// RUN: mlir-opt %s \
+// RUN: -gpu-kernel-outlining \
+// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
+// RUN: -gpu-to-llvm \
+// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir
index b7b0d4aebbf7..6910b511099e 100644
--- a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir
+++ b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir
@@ -1,6 +1,8 @@
-// RUN: mlir-cuda-runner %s \
-// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
-// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
+// RUN: mlir-opt %s \
+// RUN: -gpu-kernel-outlining \
+// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
+// RUN: -gpu-to-llvm \
+// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir
index f970b4cee960..52b4ef5aadf7 100644
--- a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir
+++ b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir
@@ -1,6 +1,8 @@
-// RUN: mlir-cuda-runner %s \
-// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
-// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
+// RUN: mlir-opt %s \
+// RUN: -gpu-kernel-outlining \
+// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
+// RUN: -gpu-to-llvm \
+// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir
index 925b0fe0db66..ea6987e99a95 100644
--- a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir
+++ b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir
@@ -1,6 +1,8 @@
-// RUN: mlir-cuda-runner %s \
-// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
-// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
+// RUN: mlir-opt %s \
+// RUN: -gpu-kernel-outlining \
+// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
+// RUN: -gpu-to-llvm \
+// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir
index 55ea5689db30..a934f9696797 100644
--- a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir
+++ b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir
@@ -1,6 +1,8 @@
-// RUN: mlir-cuda-runner %s \
-// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
-// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
+// RUN: mlir-opt %s \
+// RUN: -gpu-kernel-outlining \
+// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
+// RUN: -gpu-to-llvm \
+// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/test/Integration/GPU/CUDA/async.mlir b/mlir/test/Integration/GPU/CUDA/async.mlir
index f5c1fd9fd0b1..c46400624d88 100644
--- a/mlir/test/Integration/GPU/CUDA/async.mlir
+++ b/mlir/test/Integration/GPU/CUDA/async.mlir
@@ -1,8 +1,9 @@
-// RUN: mlir-cuda-runner %s \
-// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
-// RUN: -gpu-async-region -async-ref-counting \
-// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
+// RUN: mlir-opt %s \
+// RUN: -gpu-kernel-outlining \
+// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
+// RUN: -gpu-async-region -async-ref-counting -gpu-to-llvm \
// RUN: -async-to-async-runtime -convert-async-to-llvm -convert-std-to-llvm \
+// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_async_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
diff --git a/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir
index 6970c457c0fd..1cb56cd9ca04 100644
--- a/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir
+++ b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir
@@ -1,6 +1,8 @@
-// RUN: mlir-cuda-runner %s \
-// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
-// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
+// RUN: mlir-opt %s \
+// RUN: -gpu-kernel-outlining \
+// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
+// RUN: -gpu-to-llvm \
+// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir
index e23472611f79..aaef634cbbd6 100644
--- a/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir
+++ b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir
@@ -1,6 +1,8 @@
-// RUN: mlir-cuda-runner %s \
-// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
-// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
+// RUN: mlir-opt %s \
+// RUN: -gpu-kernel-outlining \
+// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
+// RUN: -gpu-to-llvm \
+// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/test/Integration/GPU/CUDA/shuffle.mlir b/mlir/test/Integration/GPU/CUDA/shuffle.mlir
index 1c1075debbef..97770100eb33 100644
--- a/mlir/test/Integration/GPU/CUDA/shuffle.mlir
+++ b/mlir/test/Integration/GPU/CUDA/shuffle.mlir
@@ -1,7 +1,7 @@
// RUN: mlir-opt %s \
// RUN: -gpu-kernel-outlining \
-// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{gpu-binary-annotation=nvvm.cubin})' \
-// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
+// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
+// RUN: -gpu-to-llvm \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
diff --git a/mlir/test/Integration/GPU/CUDA/two-modules.mlir b/mlir/test/Integration/GPU/CUDA/two-modules.mlir
index 61b42dcab009..4926218a32aa 100644
--- a/mlir/test/Integration/GPU/CUDA/two-modules.mlir
+++ b/mlir/test/Integration/GPU/CUDA/two-modules.mlir
@@ -1,6 +1,8 @@
-// RUN: mlir-cuda-runner %s \
-// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
-// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
+// RUN: mlir-opt %s \
+// RUN: -gpu-kernel-outlining \
+// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
+// RUN: -gpu-to-llvm \
+// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/tools/CMakeLists.txt b/mlir/tools/CMakeLists.txt
index 3a60ae25548e..37793ce65ab1 100644
--- a/mlir/tools/CMakeLists.txt
+++ b/mlir/tools/CMakeLists.txt
@@ -1,4 +1,3 @@
-add_subdirectory(mlir-cuda-runner)
add_subdirectory(mlir-cpu-runner)
add_subdirectory(mlir-opt)
add_subdirectory(mlir-reduce)
diff --git a/mlir/tools/mlir-cuda-runner/CMakeLists.txt b/mlir/tools/mlir-cuda-runner/CMakeLists.txt
deleted file mode 100644
index be585b43b29e..000000000000
--- a/mlir/tools/mlir-cuda-runner/CMakeLists.txt
+++ /dev/null
@@ -1,75 +0,0 @@
-set(LLVM_OPTIONAL_SOURCES
- mlir-cuda-runner.cpp
- )
-set(LLVM_LINK_COMPONENTS
- Core
- Support
-)
-
-if(MLIR_CUDA_RUNNER_ENABLED)
- if (NOT ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD))
- message(SEND_ERROR
- "Building the mlir cuda runner requires the NVPTX backend")
- endif()
-
- # Configure CUDA runner support. Using check_language first allows us to give
- # a custom error message.
- include(CheckLanguage)
- check_language(CUDA)
- if (CMAKE_CUDA_COMPILER)
- enable_language(CUDA)
- else()
- message(SEND_ERROR
- "Building the mlir cuda runner requires a working CUDA install")
- endif()
-
- # We need the libcuda.so library.
- find_library(CUDA_RUNTIME_LIBRARY cuda)
-
- get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
- set(LIBS
- ${conversion_libs}
- MLIRJitRunner
- MLIRAnalysis
- MLIRAsync
- MLIREDSC
- MLIRExecutionEngine
- MLIRGPU
- MLIRIR
- MLIRLLVMIR
- MLIRLLVMToLLVMIRTranslation
- MLIRNVVMIR
- MLIRParser
- MLIRStandard
- MLIRSupport
- MLIRTargetLLVMIRExport
- MLIRNVVMToLLVMIRTranslation
- MLIRTransforms
- MLIRTranslation
- ${CUDA_RUNTIME_LIBRARY}
- )
-
- # Manually expand the target library, since our MLIR libraries
- # aren't plugged into the LLVM dependency tracking. If we don't
- # do this then we can't insert the CodeGen library after ourselves
- llvm_expand_pseudo_components(TARGET_LIBS AllTargetsCodeGens)
- # Prepend LLVM in front of every target, this is how the library
- # are named with CMake
- SET(targets_to_link)
- FOREACH(t ${TARGET_LIBS})
- LIST(APPEND targets_to_link "LLVM${t}")
- ENDFOREACH(t)
-
- add_llvm_tool(mlir-cuda-runner
- mlir-cuda-runner.cpp
-
- DEPENDS
- mlir_cuda_runtime
- )
- target_include_directories(mlir-cuda-runner
- PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
- )
- llvm_update_compile_flags(mlir-cuda-runner)
- target_link_libraries(mlir-cuda-runner PRIVATE ${LIBS} ${targets_to_link})
-
-endif()
diff --git a/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp b/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp
deleted file mode 100644
index 541e1ba95990..000000000000
--- a/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp
+++ /dev/null
@@ -1,191 +0,0 @@
-//===- mlir-cuda-runner.cpp - MLIR CUDA Execution Driver-------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This is a command line utility that executes an MLIR file on the GPU by
-// translating MLIR to NVVM/LVVM IR before JIT-compiling and executing the
-// latter.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/STLExtras.h"
-
-#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
-#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
-#include "mlir/Conversion/Passes.h"
-#include "mlir/Dialect/Async/IR/Async.h"
-#include "mlir/Dialect/Async/Passes.h"
-#include "mlir/Dialect/GPU/GPUDialect.h"
-#include "mlir/Dialect/GPU/Passes.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/ExecutionEngine/JitRunner.h"
-#include "mlir/ExecutionEngine/OptUtils.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Export.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/Passes.h"
-
-#include "llvm/Support/InitLLVM.h"
-#include "llvm/Support/TargetSelect.h"
-
-#include "cuda.h"
-
-using namespace mlir;
-
-static void emitCudaError(const llvm::Twine &expr, const char *buffer,
- CUresult result, Location loc) {
- const char *error;
- cuGetErrorString(result, &error);
- emitError(loc, expr.concat(" failed with error code ")
- .concat(llvm::Twine{error})
- .concat("[")
- .concat(buffer)
- .concat("]"));
-}
-
-#define RETURN_ON_CUDA_ERROR(expr) \
- do { \
- if (auto status = (expr)) { \
- emitCudaError(#expr, jitErrorBuffer, status, loc); \
- return {}; \
- } \
- } while (false)
-
-OwnedBlob compilePtxToCubin(const std::string ptx, Location loc,
- StringRef name) {
- char jitErrorBuffer[4096] = {0};
-
- // Initialize CUDA once in a thread-safe manner.
- static CUresult cuInitResult = [] { return cuInit(/*flags=*/0); }();
- RETURN_ON_CUDA_ERROR(cuInitResult);
-
- // Linking requires a device context.
- CUdevice device;
- RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0));
- CUcontext context;
- RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device));
- CUlinkState linkState;
-
- CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER,
- CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES};
- void *jitOptionsVals[] = {jitErrorBuffer,
- reinterpret_cast<void *>(sizeof(jitErrorBuffer))};
-
- RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */
- jitOptions, /* jit options */
- jitOptionsVals, /* jit option values */
- &linkState));
-
- RETURN_ON_CUDA_ERROR(
- cuLinkAddData(linkState, CUjitInputType::CU_JIT_INPUT_PTX,
- const_cast<void *>(static_cast<const void *>(ptx.c_str())),
- ptx.length(), name.str().data(), /* kernel name */
- 0, /* number of jit options */
- nullptr, /* jit options */
- nullptr /* jit option values */
- ));
-
- void *cubinData;
- size_t cubinSize;
- RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize));
-
- char *cubinAsChar = static_cast<char *>(cubinData);
- OwnedBlob result =
- std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize);
-
- // This will also destroy the cubin data.
- RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState));
- RETURN_ON_CUDA_ERROR(cuCtxDestroy(context));
-
- return result;
-}
-
-struct GpuToCubinPipelineOptions
- : public mlir::PassPipelineOptions<GpuToCubinPipelineOptions> {
- Option<std::string> gpuBinaryAnnotation{
- *this, "gpu-binary-annotation",
- llvm::cl::desc("Annotation attribute string for GPU binary"),
- llvm::cl::init(gpu::getDefaultGpuBinaryAnnotation())};
-};
-
-// Register cuda-runner specific passes.
-static void registerCudaRunnerPasses() {
- PassPipelineRegistration<GpuToCubinPipelineOptions> registerGpuToCubin(
- "gpu-to-cubin", "Generate CUBIN from gpu.launch regions",
- [&](OpPassManager &pm, const GpuToCubinPipelineOptions &options) {
- pm.addPass(createGpuKernelOutliningPass());
- auto &kernelPm = pm.nest<gpu::GPUModuleOp>();
- kernelPm.addPass(createStripDebugInfoPass());
- kernelPm.addPass(createLowerGpuOpsToNVVMOpsPass());
- kernelPm.addPass(createConvertGPUKernelToBlobPass(
- translateModuleToLLVMIR, compilePtxToCubin, "nvptx64-nvidia-cuda",
- "sm_35", "+ptx60", options.gpuBinaryAnnotation));
- });
- registerGPUPasses();
- registerGpuToLLVMConversionPassPass();
- registerAsyncPasses();
- registerConvertAsyncToLLVMPass();
- registerConvertStandardToLLVMPass();
-}
-
-static LogicalResult runMLIRPasses(ModuleOp module,
- PassPipelineCLParser &passPipeline) {
- PassManager pm(module.getContext(), PassManager::Nesting::Implicit);
- applyPassManagerCLOptions(pm);
-
- auto errorHandler = [&](const Twine &msg) {
- emitError(UnknownLoc::get(module.getContext())) << msg;
- return failure();
- };
-
- // Build the provided pipeline.
- if (failed(passPipeline.addToPipeline(pm, errorHandler)))
- return failure();
-
- // Run the pipeline.
- return pm.run(module);
-}
-
-int main(int argc, char **argv) {
- llvm::InitLLVM y(argc, argv);
- llvm::InitializeNativeTarget();
- llvm::InitializeNativeTargetAsmPrinter();
-
- // Initialize LLVM NVPTX backend.
- LLVMInitializeNVPTXTarget();
- LLVMInitializeNVPTXTargetInfo();
- LLVMInitializeNVPTXTargetMC();
- LLVMInitializeNVPTXAsmPrinter();
-
- mlir::initializeLLVMPasses();
-
- registerCudaRunnerPasses();
- PassPipelineCLParser passPipeline("", "Compiler passes to run");
- registerPassManagerCLOptions();
-
- auto mlirTransformer = [&](ModuleOp module) {
- return runMLIRPasses(module, passPipeline);
- };
-
- mlir::JitRunnerConfig jitRunnerConfig;
- jitRunnerConfig.mlirTransformer = mlirTransformer;
-
- mlir::DialectRegistry registry;
- registry.insert<mlir::LLVM::LLVMDialect, mlir::NVVM::NVVMDialect,
- mlir::async::AsyncDialect, mlir::gpu::GPUDialect,
- mlir::StandardOpsDialect>();
- mlir::registerLLVMDialectTranslation(registry);
- mlir::registerNVVMDialectTranslation(registry);
-
- return mlir::JitRunnerMain(argc, argv, registry, jitRunnerConfig);
-}
More information about the Mlir-commits
mailing list