[Mlir-commits] [mlir] 211c975 - [mlir][NVVM] Adds the NVVM target attribute.
Fabian Mora
llvmlistbot at llvm.org
Tue Aug 8 12:21:45 PDT 2023
Author: Fabian Mora
Date: 2023-08-08T19:21:36Z
New Revision: 211c9752c8200fbb3ff7be1f6aa98037901758ce
URL: https://github.com/llvm/llvm-project/commit/211c9752c8200fbb3ff7be1f6aa98037901758ce
DIFF: https://github.com/llvm/llvm-project/commit/211c9752c8200fbb3ff7be1f6aa98037901758ce.diff
LOG: [mlir][NVVM] Adds the NVVM target attribute.
**For an explanation of these patches see D154153.**
Commit message:
This patch adds the NVVM target attribute for serializing GPU modules into
strings containing cubin.
Depends on D154113 and D154100 and D154097
Reviewed By: mehdi_amini
Differential Revision: https://reviews.llvm.org/D154117
Added:
mlir/include/mlir/Target/LLVM/NVVM/Target.h
mlir/include/mlir/Target/LLVM/NVVM/Utils.h
mlir/lib/Target/LLVM/NVVM/Target.cpp
mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
Modified:
mlir/CMakeLists.txt
mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
mlir/include/mlir/InitAllExtensions.h
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
mlir/lib/Target/LLVM/CMakeLists.txt
mlir/test/Dialect/GPU/ops.mlir
mlir/test/Dialect/LLVMIR/nvvm.mlir
mlir/unittests/Target/LLVM/CMakeLists.txt
Removed:
################################################################################
diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt
index c91e9cd93dc8ee..fa4f6e76f985fb 100644
--- a/mlir/CMakeLists.txt
+++ b/mlir/CMakeLists.txt
@@ -97,7 +97,7 @@ endif()
# Build the CUDA conversions and run according tests if the NVPTX backend
# is available
-if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD AND MLIR_ENABLE_EXECUTION_ENGINE)
+if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
set(MLIR_ENABLE_CUDA_CONVERSIONS 1)
else()
set(MLIR_ENABLE_CUDA_CONVERSIONS 0)
@@ -118,6 +118,9 @@ set(MLIR_ENABLE_CUDA_RUNNER 0 CACHE BOOL "Enable building the mlir CUDA runner")
set(MLIR_ENABLE_ROCM_RUNNER 0 CACHE BOOL "Enable building the mlir ROCm runner")
set(MLIR_ENABLE_SPIRV_CPU_RUNNER 0 CACHE BOOL "Enable building the mlir SPIR-V cpu runner")
set(MLIR_ENABLE_VULKAN_RUNNER 0 CACHE BOOL "Enable building the mlir Vulkan runner")
+set(MLIR_ENABLE_NVPTXCOMPILER 0 CACHE BOOL
+ "Statically link the nvptxlibrary instead of calling ptxas as a subprocess \
+ for compiling PTX to cubin")
option(MLIR_INCLUDE_TESTS
"Generate build targets for the MLIR unit tests."
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
index 1644d0029380ce..13d86e9e665270 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h
@@ -15,6 +15,7 @@
#define MLIR_DIALECT_LLVMIR_NVVMDIALECT_H_
#include "mlir/Bytecode/BytecodeOpInterface.h"
+#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Dialect.h"
#include "mlir/IR/OpDefinition.h"
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index c3cafe6b33c6c7..490a0db9baa028 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -14,6 +14,7 @@
#define NVVMIR_OPS
include "mlir/IR/EnumAttr.td"
+include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td"
include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
@@ -1472,4 +1473,72 @@ def NVVM_WgmmaWaitGroupSyncOp : NVVM_Op<"wgmma.wait.group.sync.aligned",
}];
}
+//===----------------------------------------------------------------------===//
+// NVVM target attribute.
+//===----------------------------------------------------------------------===//
+
+def NVVM_TargettAttr : NVVM_Attr<"NVVMTarget", "target"> {
+ let description = [{
+ GPU target attribute for controlling compilation of NVIDIA targets. All
+ parameters decay into default values if not present.
+
+ Examples:
+
+ 1. Target with default values.
+ ```
+ gpu.module @mymodule [#nvvm.target] attributes {...} {
+ ...
+ }
+ ```
+
+ 2. Target with `sm_90` chip and fast math.
+ ```
+ gpu.module @mymodule [#nvvm.target<chip = "sm_90", flags = {fast}>] {
+ ...
+ }
+ ```
+ }];
+ let parameters = (ins
+ DefaultValuedParameter<"int", "2", "Optimization level to apply.">:$O,
+ StringRefParameter<"Target triple.", "\"nvptx64-nvidia-cuda\"">:$triple,
+ StringRefParameter<"Target chip.", "\"sm_50\"">:$chip,
+ StringRefParameter<"Target chip features.", "\"+ptx60\"">:$features,
+ OptionalParameter<"DictionaryAttr", "Target specific flags.">:$flags,
+ OptionalParameter<"ArrayAttr", "Files to link to the LLVM module.">:$link
+ );
+ let assemblyFormat = [{
+ (`<` struct($O, $triple, $chip, $features, $flags, $link)^ `>`)?
+ }];
+ let builders = [
+ AttrBuilder<(ins CArg<"int", "2">:$optLevel,
+ CArg<"StringRef", "\"nvptx64-nvidia-cuda\"">:$triple,
+ CArg<"StringRef", "\"sm_50\"">:$chip,
+ CArg<"StringRef", "\"+ptx60\"">:$features,
+ CArg<"DictionaryAttr", "nullptr">:$targetFlags,
+ CArg<"ArrayAttr", "nullptr">:$linkFiles), [{
+ return Base::get($_ctxt, optLevel, triple, chip, features, targetFlags, linkFiles);
+ }]>
+ ];
+ let skipDefaultBuilders = 1;
+ let genVerifyDecl = 1;
+ let extraClassDeclaration = [{
+ bool hasFlag(StringRef flag) const;
+ bool hasFastMath() const;
+ bool hasFtz() const;
+ }];
+ let extraClassDefinition = [{
+ bool $cppClass::hasFlag(StringRef flag) const {
+ if (DictionaryAttr flags = getFlags())
+ return flags.get(flag) != nullptr;
+ return false;
+ }
+ bool $cppClass::hasFastMath() const {
+ return hasFlag("fast");
+ }
+ bool $cppClass::hasFtz() const {
+ return hasFlag("ftz");
+ }
+ }];
+}
+
#endif // NVVMIR_OPS
diff --git a/mlir/include/mlir/InitAllExtensions.h b/mlir/include/mlir/InitAllExtensions.h
index 45e360e8666ec2..14fc94fc86cd9f 100644
--- a/mlir/include/mlir/InitAllExtensions.h
+++ b/mlir/include/mlir/InitAllExtensions.h
@@ -16,6 +16,7 @@
#include "mlir/Conversion/NVVMToLLVM/NVVMToLLVM.h"
#include "mlir/Dialect/Func/Extensions/AllExtensions.h"
+#include "mlir/Target/LLVM/NVVM/Target.h"
#include <cstdlib>
@@ -29,6 +30,7 @@ namespace mlir {
inline void registerAllExtensions(DialectRegistry ®istry) {
func::registerAllExtensions(registry);
registerConvertNVVMToLLVMInterface(registry);
+ registerNVVMTarget(registry);
}
} // namespace mlir
diff --git a/mlir/include/mlir/Target/LLVM/NVVM/Target.h b/mlir/include/mlir/Target/LLVM/NVVM/Target.h
new file mode 100644
index 00000000000000..ab011d34d6cb22
--- /dev/null
+++ b/mlir/include/mlir/Target/LLVM/NVVM/Target.h
@@ -0,0 +1,28 @@
+//===- Target.h - MLIR NVVM target registration -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides registration calls for attaching the NVVM target interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TARGET_LLVM_NVVM_TARGET_H
+#define MLIR_TARGET_LLVM_NVVM_TARGET_H
+
+namespace mlir {
+class DialectRegistry;
+class MLIRContext;
+/// Registers the `TargetAttrInterface` for the `#nvvm.target` attribute in the
+/// given registry.
+void registerNVVMTarget(DialectRegistry ®istry);
+
+/// Registers the `TargetAttrInterface` for the `#nvvm.target` attribute in the
+/// registry associated with the given context.
+void registerNVVMTarget(MLIRContext &context);
+} // namespace mlir
+
+#endif // MLIR_TARGET_LLVM_NVVM_TARGET_H
diff --git a/mlir/include/mlir/Target/LLVM/NVVM/Utils.h b/mlir/include/mlir/Target/LLVM/NVVM/Utils.h
new file mode 100644
index 00000000000000..d5926d15484722
--- /dev/null
+++ b/mlir/include/mlir/Target/LLVM/NVVM/Utils.h
@@ -0,0 +1,74 @@
+//===- Utils.h - MLIR NVVM target utils -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This files declares NVVM target related utility classes and functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TARGET_LLVM_NVVM_UTILS_H
+#define MLIR_TARGET_LLVM_NVVM_UTILS_H
+
+#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
+#include "mlir/Target/LLVM/ModuleToObject.h"
+
+namespace mlir {
+namespace NVVM {
+/// Searches & returns the path CUDA toolkit path, the search order is:
+/// 1. The `CUDA_ROOT` environment variable.
+/// 2. The `CUDA_HOME` environment variable.
+/// 3. The `CUDA_PATH` environment variable.
+/// 4. The CUDA toolkit path detected by CMake.
+/// 5. Returns an empty string.
+StringRef getCUDAToolkitPath();
+
+/// Base class for all NVVM serializations from GPU modules into binary strings.
+/// By default this class serializes into LLVM bitcode.
+class SerializeGPUModuleBase : public LLVM::ModuleToObject {
+public:
+ /// Initializes the `toolkitPath` with the path in `targetOptions` or if empty
+ /// with the path in `getCUDAToolkitPath`.
+ SerializeGPUModuleBase(Operation &module, NVVMTargetAttr target,
+ const gpu::TargetOptions &targetOptions = {});
+
+ /// Initializes the LLVM NVPTX target by safely calling `LLVMInitializeNVPTX*`
+ /// methods if available.
+ static void init();
+
+ /// Returns the target attribute.
+ NVVMTargetAttr getTarget() const;
+
+ /// Returns the CUDA toolkit path.
+ StringRef getToolkitPath() const;
+
+ /// Returns the bitcode files to be loaded.
+ ArrayRef<std::string> getFileList() const;
+
+ /// Appends `nvvm/libdevice.bc` into `fileList`. Returns failure if the
+ /// library couldn't be found.
+ LogicalResult appendStandardLibs();
+
+ /// Loads the bitcode files in `fileList`.
+ virtual std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
+ loadBitcodeFiles(llvm::Module &module,
+ llvm::TargetMachine &targetMachine) override;
+
+protected:
+ /// NVVM target attribute.
+ NVVMTargetAttr target;
+
+ /// CUDA toolkit path.
+ std::string toolkitPath;
+
+ /// List of LLVM bitcode files to link to.
+ SmallVector<std::string> fileList;
+};
+} // namespace NVVM
+} // namespace mlir
+
+#endif // MLIR_TARGET_LLVM_NVVM_UTILS_H
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 4c1f92983887ba..f086af6b74b1b0 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -17,6 +17,7 @@
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Conversion/ConvertToLLVM/ToLLVMInterface.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinAttributes.h"
@@ -723,6 +724,7 @@ void NVVMDialect::initialize() {
// registered.
allowUnknownOperations();
declarePromisedInterface<ConvertToLLVMPatternInterface>();
+ declarePromisedInterface<gpu::TargetAttrInterface>();
}
LogicalResult NVVMDialect::verifyOperationAttribute(Operation *op,
@@ -761,6 +763,35 @@ LogicalResult NVVMDialect::verifyOperationAttribute(Operation *op,
return success();
}
+//===----------------------------------------------------------------------===//
+// NVVM target attribute.
+//===----------------------------------------------------------------------===//
+LogicalResult
+NVVMTargetAttr::verify(function_ref<InFlightDiagnostic()> emitError,
+ int optLevel, StringRef triple, StringRef chip,
+ StringRef features, DictionaryAttr flags,
+ ArrayAttr files) {
+ if (optLevel < 0 || optLevel > 3) {
+ emitError() << "The optimization level must be a number between 0 and 3.";
+ return failure();
+ }
+ if (triple.empty()) {
+ emitError() << "The target triple cannot be empty.";
+ return failure();
+ }
+ if (chip.empty()) {
+ emitError() << "The target chip cannot be empty.";
+ return failure();
+ }
+ if (files && !llvm::all_of(files, [](::mlir::Attribute attr) {
+ return attr && mlir::isa<StringAttr>(attr);
+ })) {
+ emitError() << "All the elements in the `link` array must be strings.";
+ return failure();
+ }
+ return success();
+}
+
#define GET_OP_CLASSES
#include "mlir/Dialect/LLVMIR/NVVMOps.cpp.inc"
diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
index 728c619b51a7a9..5f311d084e43f1 100644
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -20,3 +20,80 @@ add_mlir_library(MLIRTargetLLVM
MLIRExecutionEngineUtils
MLIRTargetLLVMIRExport
)
+
+if (MLIR_ENABLE_CUDA_CONVERSIONS)
+ set(NVPTX_LIBS
+ NVPTXCodeGen
+ NVPTXDesc
+ NVPTXInfo
+ )
+endif()
+
+add_mlir_dialect_library(MLIRNVVMTarget
+ NVVM/Target.cpp
+
+ ADDITIONAL_HEADER_DIRS
+ ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/LLVMIR
+
+ LINK_COMPONENTS
+ ${NVPTX_LIBS}
+
+ LINK_LIBS PUBLIC
+ MLIRIR
+ MLIRExecutionEngineUtils
+ MLIRSupport
+ MLIRGPUDialect
+ MLIRTargetLLVM
+ MLIRNVVMToLLVMIRTranslation
+ )
+
+if(MLIR_ENABLE_CUDA_CONVERSIONS)
+ # Find the CUDA toolkit.
+ find_package(CUDAToolkit)
+
+ if(CUDAToolkit_FOUND)
+ # Get the CUDA toolkit path. The path is needed for detecting `libdevice.bc`.
+ # These extra steps are needed because of a bug on CMake.
+ # See: https://gitlab.kitware.com/cmake/cmake/-/issues/24858
+ # TODO: Bump the MLIR CMake version to 3.26.4 and switch to
+ # ${CUDAToolkit_LIBRARY_ROOT}
+ if(NOT DEFINED ${CUDAToolkit_LIBRARY_ROOT})
+ get_filename_component(MLIR_CUDAToolkit_ROOT ${CUDAToolkit_BIN_DIR}
+ DIRECTORY ABSOLUTE)
+ else()
+ set(MLIR_CUDAToolkit_ROOT ${CUDAToolkit_LIBRARY_ROOT})
+ endif()
+
+ # Add the `nvptxcompiler` library.
+ if(MLIR_ENABLE_NVPTXCOMPILER)
+ # Find the `nvptxcompiler` library.
+ # TODO: Bump the MLIR CMake version to 3.25 and use `CUDA::nvptxcompiler_static`.
+ find_library(MLIR_NVPTXCOMPILER_LIB nvptxcompiler_static
+ PATHS ${CUDAToolkit_LIBRARY_DIR} NO_DEFAULT_PATH)
+
+ # Fail if `nvptxcompiler_static` couldn't be found.
+ if(MLIR_NVPTXCOMPILER_LIB STREQUAL "MLIR_NVPTXCOMPILER_LIB-NOTFOUND")
+ message(FATAL_ERROR
+ "Requested using the `nvptxcompiler` library backend but it couldn't be found.")
+ endif()
+
+ # Link against `nvptxcompiler_static`. TODO: use `CUDA::nvptxcompiler_static`.
+ target_link_libraries(MLIRNVVMTarget PRIVATE ${MLIR_NVPTXCOMPILER_LIB})
+ target_include_directories(obj.MLIRNVVMTarget PUBLIC ${CUDAToolkit_INCLUDE_DIRS})
+ endif()
+ else()
+ # Fail if `MLIR_ENABLE_NVPTXCOMPILER` is enabled and the toolkit couldn't be found.
+ if(MLIR_ENABLE_NVPTXCOMPILER)
+ message(FATAL_ERROR
+ "Requested using the `nvptxcompiler` library backend but it couldn't be found.")
+ endif()
+ endif()
+ message(VERBOSE "MLIR default CUDA toolkit path: ${MLIR_CUDAToolkit_ROOT}")
+
+ # Define the `CUDAToolkit` path.
+ target_compile_definitions(obj.MLIRNVVMTarget
+ PRIVATE
+ MLIR_NVPTXCOMPILER_ENABLED=${MLIR_ENABLE_NVPTXCOMPILER}
+ __DEFAULT_CUDATOOLKIT_PATH__="${MLIR_CUDAToolkit_ROOT}"
+ )
+endif()
diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp
new file mode 100644
index 00000000000000..5f56eea8ce4925
--- /dev/null
+++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp
@@ -0,0 +1,508 @@
+//===- Target.cpp - MLIR LLVM NVVM target compilation -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This files defines NVVM target related functions including registration
+// calls for the `#nvvm.target` compilation attribute.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Target/LLVM/NVVM/Target.h"
+
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
+#include "mlir/Target/LLVM/NVVM/Utils.h"
+#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Export.h"
+
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/TargetSelect.h"
+
+#include <cstdlib>
+
+using namespace mlir;
+using namespace mlir::NVVM;
+
+#ifndef __DEFAULT_CUDATOOLKIT_PATH__
+#define __DEFAULT_CUDATOOLKIT_PATH__ ""
+#endif
+
+namespace {
+// Implementation of the `TargetAttrInterface` model.
+class NVVMTargetAttrImpl
+ : public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
+public:
+ std::optional<SmallVector<char, 0>>
+ serializeToObject(Attribute attribute, Operation *module,
+ const gpu::TargetOptions &options) const;
+};
+} // namespace
+
+// Register the NVVM dialect, the NVVM translation & the target interface.
+void mlir::registerNVVMTarget(DialectRegistry ®istry) {
+ registerNVVMDialectTranslation(registry);
+ registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {
+ NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(*ctx);
+ });
+}
+
+void mlir::registerNVVMTarget(MLIRContext &context) {
+ DialectRegistry registry;
+ registerNVVMTarget(registry);
+ context.appendDialectRegistry(registry);
+}
+
+// Search for the CUDA toolkit path.
+StringRef mlir::NVVM::getCUDAToolkitPath() {
+ if (const char *var = std::getenv("CUDA_ROOT"))
+ return var;
+ if (const char *var = std::getenv("CUDA_HOME"))
+ return var;
+ if (const char *var = std::getenv("CUDA_PATH"))
+ return var;
+ return __DEFAULT_CUDATOOLKIT_PATH__;
+}
+
+SerializeGPUModuleBase::SerializeGPUModuleBase(
+ Operation &module, NVVMTargetAttr target,
+ const gpu::TargetOptions &targetOptions)
+ : ModuleToObject(module, target.getTriple(), target.getChip(),
+ target.getFeatures(), target.getO()),
+ target(target), toolkitPath(targetOptions.getToolkitPath()),
+ fileList(targetOptions.getLinkFiles()) {
+
+ // If `targetOptions` have an empty toolkitPath use `getCUDAToolkitPath`
+ if (toolkitPath.empty())
+ toolkitPath = getCUDAToolkitPath();
+
+ // Append the files in the target attribute.
+ if (ArrayAttr files = target.getLink())
+ for (Attribute attr : files.getValue())
+ if (auto file = dyn_cast<StringAttr>(attr))
+ fileList.push_back(file.str());
+
+ // Append libdevice to the files to be loaded.
+ (void)appendStandardLibs();
+}
+
+void SerializeGPUModuleBase::init() {
+ static llvm::once_flag initializeBackendOnce;
+ llvm::call_once(initializeBackendOnce, []() {
+ // If the `NVPTX` LLVM target was built, initialize it.
+#if MLIR_CUDA_CONVERSIONS_ENABLED == 1
+ LLVMInitializeNVPTXTarget();
+ LLVMInitializeNVPTXTargetInfo();
+ LLVMInitializeNVPTXTargetMC();
+ LLVMInitializeNVPTXAsmPrinter();
+#endif
+ });
+}
+
+NVVMTargetAttr SerializeGPUModuleBase::getTarget() const { return target; }
+
+StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; }
+
+ArrayRef<std::string> SerializeGPUModuleBase::getFileList() const {
+ return fileList;
+}
+
+// Try to append `libdevice` from a CUDA toolkit installation.
+LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
+ StringRef pathRef = getToolkitPath();
+ if (pathRef.size()) {
+ SmallVector<char, 256> path;
+ path.insert(path.begin(), pathRef.begin(), pathRef.end());
+ pathRef = StringRef(path.data(), path.size());
+ if (!llvm::sys::fs::is_directory(pathRef)) {
+ getOperation().emitError() << "CUDA path: " << pathRef
+ << " does not exist or is not a directory.\n";
+ return failure();
+ }
+ llvm::sys::path::append(path, "nvvm", "libdevice", "libdevice.10.bc");
+ pathRef = StringRef(path.data(), path.size());
+ if (!llvm::sys::fs::is_regular_file(pathRef)) {
+ getOperation().emitError() << "LibDevice path: " << pathRef
+ << " does not exist or is not a file.\n";
+ return failure();
+ }
+ fileList.push_back(pathRef.str());
+ }
+ return success();
+}
+
+std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
+SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module,
+ llvm::TargetMachine &targetMachine) {
+ SmallVector<std::unique_ptr<llvm::Module>> bcFiles;
+ if (failed(loadBitcodeFilesFromList(module.getContext(), targetMachine,
+ fileList, bcFiles, true)))
+ return std::nullopt;
+ return bcFiles;
+}
+
+#if MLIR_CUDA_CONVERSIONS_ENABLED == 1
+namespace {
+class NVPTXSerializer : public SerializeGPUModuleBase {
+public:
+ NVPTXSerializer(Operation &module, NVVMTargetAttr target,
+ const gpu::TargetOptions &targetOptions);
+
+ gpu::GPUModuleOp getOperation();
+
+ // Compile PTX to cubin using `ptxas`.
+ std::optional<SmallVector<char, 0>>
+ compileToBinary(const std::string &ptxCode);
+
+ // Compile PTX to cubin using the `nvptxcompiler` library.
+ std::optional<SmallVector<char, 0>>
+ compileToBinaryNVPTX(const std::string &ptxCode);
+
+ std::optional<SmallVector<char, 0>>
+ moduleToObject(llvm::Module &llvmModule,
+ llvm::TargetMachine &targetMachine) override;
+
+private:
+ using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
+
+ // Create a temp file.
+ std::optional<TmpFile> createTemp(StringRef name, StringRef suffix);
+
+ // Find the PTXAS compiler. The search order is:
+ // 1. The toolkit path in `targetOptions`.
+ // 2. In the system PATH.
+ // 3. The path from `getCUDAToolkitPath()`.
+ std::optional<std::string> findPtxas() const;
+
+ // Target options.
+ gpu::TargetOptions targetOptions;
+};
+} // namespace
+
+NVPTXSerializer::NVPTXSerializer(Operation &module, NVVMTargetAttr target,
+ const gpu::TargetOptions &targetOptions)
+ : SerializeGPUModuleBase(module, target, targetOptions),
+ targetOptions(targetOptions) {}
+
+std::optional<NVPTXSerializer::TmpFile>
+NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
+ llvm::SmallString<128> filename;
+ std::error_code ec =
+ llvm::sys::fs::createTemporaryFile(name, suffix, filename);
+ if (ec) {
+ getOperation().emitError() << "Couldn't create the temp file: `" << filename
+ << "`, error message: " << ec.message();
+ return std::nullopt;
+ }
+ return TmpFile(filename, llvm::FileRemover(filename.c_str()));
+}
+
+gpu::GPUModuleOp NVPTXSerializer::getOperation() {
+ return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
+}
+
+std::optional<std::string> NVPTXSerializer::findPtxas() const {
+ // Find the `ptxas` compiler.
+ // 1. Check the toolkit path given in the command line.
+ StringRef pathRef = targetOptions.getToolkitPath();
+ SmallVector<char, 256> path;
+ if (pathRef.size()) {
+ path.insert(path.begin(), pathRef.begin(), pathRef.end());
+ llvm::sys::path::append(path, "bin", "ptxas");
+ if (llvm::sys::fs::can_execute(path))
+ return StringRef(path.data(), path.size()).str();
+ }
+
+ // 2. Check PATH.
+ if (std::optional<std::string> ptxasCompiler =
+ llvm::sys::Process::FindInEnvPath("PATH", "ptxas"))
+ return *ptxasCompiler;
+
+ // 3. Check `getCUDAToolkitPath()`.
+ pathRef = getCUDAToolkitPath();
+ path.clear();
+ if (pathRef.size()) {
+ path.insert(path.begin(), pathRef.begin(), pathRef.end());
+ llvm::sys::path::append(path, "bin", "ptxas");
+ if (llvm::sys::fs::can_execute(path))
+ return StringRef(path.data(), path.size()).str();
+ }
+ return std::nullopt;
+}
+
+// TODO: clean this method & have a generic tool driver or never emit binaries
+// with this mechanism and let another stage take care of it.
+std::optional<SmallVector<char, 0>>
+NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
+ // Find the PTXAS compiler.
+ std::optional<std::string> ptxasCompiler = findPtxas();
+ if (!ptxasCompiler) {
+ getOperation().emitError()
+ << "Couldn't find the `ptxas` compiler. Please specify the toolkit "
+ "path, add the compiler to $PATH, or set one of the environment "
+ "variables in `NVVM::getCUDAToolkitPath()`.";
+ return std::nullopt;
+ }
+
+ // Base name for all temp files: mlir-<module name>-<target triple>-<chip>.
+ std::string basename =
+ llvm::formatv("mlir-{0}-{1}-{2}", getOperation().getNameAttr().getValue(),
+ getTarget().getTriple(), getTarget().getChip());
+
+ // Create temp files:
+ std::optional<TmpFile> ptxFile = createTemp(basename, "ptx");
+ if (!ptxFile)
+ return std::nullopt;
+ std::optional<TmpFile> logFile = createTemp(basename, "log");
+ if (!logFile)
+ return std::nullopt;
+ std::optional<TmpFile> cubinFile = createTemp(basename, "cubin");
+ if (!cubinFile)
+ return std::nullopt;
+
+ std::error_code ec;
+ // Dump the PTX to a temp file.
+ {
+ llvm::raw_fd_ostream ptxStream(ptxFile->first, ec);
+ if (ec) {
+ getOperation().emitError()
+ << "Couldn't open the file: `" << ptxFile->first
+ << "`, error message: " << ec.message();
+ return std::nullopt;
+ }
+ ptxStream << ptxCode;
+ if (ptxStream.has_error()) {
+ getOperation().emitError()
+ << "An error occurred while writing the PTX to: `" << ptxFile->first
+ << "`.";
+ return std::nullopt;
+ }
+ ptxStream.flush();
+ }
+
+ // Create PTX args.
+ std::string optLevel = std::to_string(this->optLevel);
+ SmallVector<StringRef, 12> ptxasArgs(
+ {StringRef("ptxas"), StringRef("-arch"), getTarget().getChip(),
+ StringRef(ptxFile->first), StringRef("-o"), StringRef(cubinFile->first),
+ "--opt-level", optLevel});
+
+ std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
+ targetOptions.tokenizeCmdOptions();
+ for (auto arg : cmdOpts.second)
+ ptxasArgs.push_back(arg);
+
+ std::optional<StringRef> redirects[] = {
+ std::nullopt,
+ logFile->first,
+ logFile->first,
+ };
+
+ // Invoke PTXAS.
+ std::string message;
+ if (llvm::sys::ExecuteAndWait(ptxasCompiler.value(), ptxasArgs,
+ /*Env=*/std::nullopt,
+ /*Redirects=*/redirects,
+ /*SecondsToWait=*/0,
+ /*MemoryLimit=*/0,
+ /*ErrMsg=*/&message)) {
+ if (message.empty()) {
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ptxasStderr =
+ llvm::MemoryBuffer::getFile(logFile->first);
+ if (ptxasStderr)
+ getOperation().emitError() << "PTXAS invocation failed. PTXAS log:\n"
+ << ptxasStderr->get()->getBuffer();
+ else
+ getOperation().emitError() << "PTXAS invocation failed.";
+ return std::nullopt;
+ }
+ getOperation().emitError()
+ << "PTXAS invocation failed, error message: " << message;
+ return std::nullopt;
+ }
+
+// Dump the output of PTXAS, helpful if the verbose flag was passed.
+#define DEBUG_TYPE "serialize-to-binary"
+ LLVM_DEBUG({
+ llvm::dbgs() << "PTXAS invocation for module: "
+ << getOperation().getNameAttr() << "\n";
+ llvm::dbgs() << "Command: ";
+ llvm::interleave(ptxasArgs, llvm::dbgs(), " ");
+ llvm::dbgs() << "\n";
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ptxasLog =
+ llvm::MemoryBuffer::getFile(logFile->first);
+ if (ptxasLog && (*ptxasLog)->getBuffer().size()) {
+ llvm::dbgs() << "Output:\n" << (*ptxasLog)->getBuffer() << "\n";
+ llvm::dbgs().flush();
+ }
+ });
+#undef DEBUG_TYPE
+
+ // Read the cubin file.
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> cubinBuffer =
+ llvm::MemoryBuffer::getFile(cubinFile->first);
+ if (!cubinBuffer) {
+ getOperation().emitError()
+ << "Couldn't open the file: `" << cubinFile->first
+ << "`, error message: " << cubinBuffer.getError().message();
+ return std::nullopt;
+ }
+ StringRef cubinStr = (*cubinBuffer)->getBuffer();
+ return SmallVector<char, 0>(cubinStr.begin(), cubinStr.end());
+}
+
+#if MLIR_NVPTXCOMPILER_ENABLED == 1
+#include "nvPTXCompiler.h"
+
+#define RETURN_ON_NVPTXCOMPILER_ERROR(expr) \
+ do { \
+ if (auto status = (expr)) { \
+ emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ") \
+ << status; \
+ return std::nullopt; \
+ } \
+ } while (false)
+
+std::optional<SmallVector<char, 0>>
+NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
+ Location loc = getOperation().getLoc();
+ nvPTXCompilerHandle compiler = nullptr;
+ nvPTXCompileResult status;
+ size_t logSize;
+
+ // Create the options.
+ std::string optLevel = std::to_string(this->optLevel);
+ std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
+ targetOptions.tokenizeCmdOptions();
+ cmdOpts.second.append(
+ {"-arch", getTarget().getChip().data(), "--opt-level", optLevel.c_str()});
+
+ // Create the compiler handle.
+ RETURN_ON_NVPTXCOMPILER_ERROR(
+ nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str()));
+
+ // Try to compile the binary.
+ status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(),
+ cmdOpts.second.data());
+
+ // Check if compilation failed.
+ if (status != NVPTXCOMPILE_SUCCESS) {
+ RETURN_ON_NVPTXCOMPILER_ERROR(
+ nvPTXCompilerGetErrorLogSize(compiler, &logSize));
+ if (logSize != 0) {
+ SmallVector<char> log(logSize + 1, 0);
+ RETURN_ON_NVPTXCOMPILER_ERROR(
+ nvPTXCompilerGetErrorLog(compiler, log.data()));
+ emitError(loc) << "NVPTX compiler invocation failed, error log: "
+ << log.data();
+ } else
+ emitError(loc) << "NVPTX compiler invocation failed with error code: "
+ << status;
+ return std::nullopt;
+ }
+
+ // Retrieve the binary.
+ size_t elfSize;
+ RETURN_ON_NVPTXCOMPILER_ERROR(
+ nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize));
+ SmallVector<char, 0> binary(elfSize, 0);
+ RETURN_ON_NVPTXCOMPILER_ERROR(
+ nvPTXCompilerGetCompiledProgram(compiler, (void *)binary.data()));
+
+// Dump the log of the compiler, helpful if the verbose flag was passed.
+#define DEBUG_TYPE "serialize-to-binary"
+ LLVM_DEBUG({
+ RETURN_ON_NVPTXCOMPILER_ERROR(
+ nvPTXCompilerGetInfoLogSize(compiler, &logSize));
+ if (logSize != 0) {
+ SmallVector<char> log(logSize + 1, 0);
+ RETURN_ON_NVPTXCOMPILER_ERROR(
+ nvPTXCompilerGetInfoLog(compiler, log.data()));
+ llvm::dbgs() << "NVPTX compiler invocation for module: "
+ << getOperation().getNameAttr() << "\n";
+ llvm::dbgs() << "Arguments: ";
+ llvm::interleave(cmdOpts.second, llvm::dbgs(), " ");
+ llvm::dbgs() << "\nOutput\n" << log.data() << "\n";
+ llvm::dbgs().flush();
+ }
+ });
+#undef DEBUG_TYPE
+ RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
+ return binary;
+}
+#endif // MLIR_NVPTXCOMPILER_ENABLED == 1
+
+std::optional<SmallVector<char, 0>>
+NVPTXSerializer::moduleToObject(llvm::Module &llvmModule,
+ llvm::TargetMachine &targetMachine) {
+ // Return LLVM IR if the compilation target is offload.
+#define DEBUG_TYPE "serialize-to-llvm"
+ LLVM_DEBUG({
+ llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr()
+ << "\n";
+ llvm::dbgs() << llvmModule << "\n";
+ llvm::dbgs().flush();
+ });
+#undef DEBUG_TYPE
+ if (targetOptions.getCompilationTarget() == gpu::TargetOptions::offload)
+ return SerializeGPUModuleBase::moduleToObject(llvmModule, targetMachine);
+
+ // Emit PTX code.
+ std::optional<std::string> serializedISA =
+ translateToISA(llvmModule, targetMachine);
+ if (!serializedISA) {
+ getOperation().emitError() << "Failed translating the module to ISA.";
+ return std::nullopt;
+ }
+#define DEBUG_TYPE "serialize-to-isa"
+ LLVM_DEBUG({
+ llvm::dbgs() << "PTX for module: " << getOperation().getNameAttr() << "\n";
+ llvm::dbgs() << *serializedISA << "\n";
+ llvm::dbgs().flush();
+ });
+#undef DEBUG_TYPE
+
+ // Return PTX if the compilation target is assembly.
+ if (targetOptions.getCompilationTarget() == gpu::TargetOptions::assembly)
+ return SmallVector<char, 0>(serializedISA->begin(), serializedISA->end());
+
+ // Compile to binary.
+#if MLIR_NVPTXCOMPILER_ENABLED == 1
+ return compileToBinaryNVPTX(*serializedISA);
+#else
+ return compileToBinary(*serializedISA);
+#endif // MLIR_NVPTXCOMPILER_ENABLED == 1
+}
+#endif // MLIR_CUDA_CONVERSIONS_ENABLED == 1
+
+std::optional<SmallVector<char, 0>>
+NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
+ const gpu::TargetOptions &options) const {
+ assert(module && "The module must be non null.");
+ if (!module)
+ return std::nullopt;
+ if (!mlir::isa<gpu::GPUModuleOp>(module)) {
+ module->emitError("Module must be a GPU module.");
+ return std::nullopt;
+ }
+#if MLIR_CUDA_CONVERSIONS_ENABLED == 1
+ NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options);
+ serializer.init();
+ return serializer.run();
+#else
+ module->emitError(
+ "The `NVPTX` target was not built. Please enable it when building LLVM.");
+ return std::nullopt;
+#endif // MLIR_CUDA_CONVERSIONS_ENABLED == 1
+}
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index 0c78e5a2d665d9..4ae39816693602 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -364,3 +364,10 @@ gpu.module @module {
gpu.return
}) {function_type = () -> (), sym_name = "func"} : () -> ()
}
+
+// Check that this doesn't crash.
+gpu.module @module_with_one_target [#nvvm.target] {
+ gpu.func @kernel(%arg0 : f32) kernel {
+ gpu.return
+ }
+}
diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir
index b26f3b02658ffd..6dce8eafc29d54 100644
--- a/mlir/test/Dialect/LLVMIR/nvvm.mlir
+++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -429,3 +429,12 @@ func.func @wgmma_wait_group_sync_aligned() {
nvvm.wgmma.wait.group.sync.aligned 0
return
}
+
+// -----
+
+// Just check these don't emit errors.
+gpu.module @module_1 [#nvvm.target<chip = "sm_90", features = "+ptx70", link = ["my_device_lib.bc"], flags = {fast, ftz}>] {
+}
+
+gpu.module @module_2 [#nvvm.target<chip = "sm_90">, #nvvm.target<chip = "sm_80">, #nvvm.target<chip = "sm_70">] {
+}
diff --git a/mlir/unittests/Target/LLVM/CMakeLists.txt b/mlir/unittests/Target/LLVM/CMakeLists.txt
index cae5b6ca221970..6b6e117df58f3d 100644
--- a/mlir/unittests/Target/LLVM/CMakeLists.txt
+++ b/mlir/unittests/Target/LLVM/CMakeLists.txt
@@ -1,4 +1,5 @@
add_mlir_unittest(MLIRTargetLLVMTests
+ SerializeNVVMTarget.cpp
SerializeToLLVMBitcode.cpp
)
@@ -7,9 +8,14 @@ llvm_map_components_to_libnames(llvm_libs nativecodegen)
target_link_libraries(MLIRTargetLLVMTests
PRIVATE
MLIRTargetLLVM
+ MLIRNVVMTarget
+ MLIRGPUDialect
+ MLIRNVVMDialect
MLIRLLVMDialect
MLIRLLVMToLLVMIRTranslation
MLIRBuiltinToLLVMIRTranslation
+ MLIRNVVMToLLVMIRTranslation
+ MLIRGPUToLLVMIRTranslation
${llvm_libs}
)
diff --git a/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp b/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
new file mode 100644
index 00000000000000..7d95840fb9a52b
--- /dev/null
+++ b/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
@@ -0,0 +1,154 @@
+//===- SerializeNVVMTarget.cpp ----------------------------------*- C++ -*-===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/InitAllDialects.h"
+#include "mlir/Parser/Parser.h"
+#include "mlir/Target/LLVM/NVVM/Target.h"
+#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
+
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Host.h"
+
+#include "gmock/gmock.h"
+
+using namespace mlir;
+
+// Skip the test if the NVPTX target was not built.
+#if MLIR_CUDA_CONVERSIONS_ENABLED == 0
+#define SKIP_WITHOUT_NVPTX(x) DISABLED_##x
+#else
+#define SKIP_WITHOUT_NVPTX(x) x
+#endif
+
+class MLIRTargetLLVMNVVM : public ::testing::Test {
+protected:
+ virtual void SetUp() {
+ registerBuiltinDialectTranslation(registry);
+ registerLLVMDialectTranslation(registry);
+ registerGPUDialectTranslation(registry);
+ registerNVVMTarget(registry);
+ }
+
+ // Checks if PTXAS is in PATH.
+ bool hasPtxas() {
+ // Find the `ptxas` compiler.
+ std::optional<std::string> ptxasCompiler =
+ llvm::sys::Process::FindInEnvPath("PATH", "ptxas");
+ return ptxasCompiler.has_value();
+ }
+
+ // Dialect registry.
+ DialectRegistry registry;
+
+ // MLIR module used for the tests.
+ const std::string moduleStr = R"mlir(
+ gpu.module @nvvm_test {
+ llvm.func @nvvm_kernel(%arg0: f32) attributes {gpu.kernel, nvvm.kernel} {
+ llvm.return
+ }
+ })mlir";
+};
+
+// Test NVVM serialization to LLVM.
+TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMMToLLVM)) {
+ MLIRContext context(registry);
+
+ OwningOpRef<ModuleOp> module =
+ parseSourceString<ModuleOp>(moduleStr, &context);
+ ASSERT_TRUE(!!module);
+
+ // Create an NVVM target.
+ NVVM::NVVMTargetAttr target = NVVM::NVVMTargetAttr::get(&context);
+
+ // Serialize the module.
+ auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
+ ASSERT_TRUE(!!serializer);
+ gpu::TargetOptions options("", {}, "", gpu::TargetOptions::offload);
+ for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
+ std::optional<SmallVector<char, 0>> object =
+ serializer.serializeToObject(gpuModule, options);
+ // Check that the serializer was successful.
+ ASSERT_TRUE(object != std::nullopt);
+ ASSERT_TRUE(object->size() > 0);
+
+ // Read the serialized module.
+ llvm::MemoryBufferRef buffer(StringRef(object->data(), object->size()),
+ "module");
+ llvm::LLVMContext llvmContext;
+ llvm::Expected<std::unique_ptr<llvm::Module>> llvmModule =
+ llvm::getLazyBitcodeModule(buffer, llvmContext);
+ ASSERT_TRUE(!!llvmModule);
+ ASSERT_TRUE(!!*llvmModule);
+
+ // Check that it has a function named `foo`.
+ ASSERT_TRUE((*llvmModule)->getFunction("nvvm_kernel") != nullptr);
+ }
+}
+
+// Test NVVM serialization to PTX.
+TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMToPTX)) {
+ MLIRContext context(registry);
+
+ OwningOpRef<ModuleOp> module =
+ parseSourceString<ModuleOp>(moduleStr, &context);
+ ASSERT_TRUE(!!module);
+
+ // Create an NVVM target.
+ NVVM::NVVMTargetAttr target = NVVM::NVVMTargetAttr::get(&context);
+
+ // Serialize the module.
+ auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
+ ASSERT_TRUE(!!serializer);
+ gpu::TargetOptions options("", {}, "", gpu::TargetOptions::assembly);
+ for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
+ std::optional<SmallVector<char, 0>> object =
+ serializer.serializeToObject(gpuModule, options);
+ // Check that the serializer was successful.
+ ASSERT_TRUE(object != std::nullopt);
+ ASSERT_TRUE(object->size() > 0);
+
+ ASSERT_TRUE(
+ StringRef(object->data(), object->size()).contains("nvvm_kernel"));
+ }
+}
+
+// Test NVVM serialization to Binary.
+TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMToBinary)) {
+ if (!hasPtxas())
+ GTEST_SKIP() << "PTXAS compiler not found, skipping test.";
+
+ MLIRContext context(registry);
+
+ OwningOpRef<ModuleOp> module =
+ parseSourceString<ModuleOp>(moduleStr, &context);
+ ASSERT_TRUE(!!module);
+
+ // Create an NVVM target.
+ NVVM::NVVMTargetAttr target = NVVM::NVVMTargetAttr::get(&context);
+
+ // Serialize the module.
+ auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
+ ASSERT_TRUE(!!serializer);
+ gpu::TargetOptions options("", {}, "", gpu::TargetOptions::binary);
+ for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
+ std::optional<SmallVector<char, 0>> object =
+ serializer.serializeToObject(gpuModule, options);
+ // Check that the serializer was successful.
+ ASSERT_TRUE(object != std::nullopt);
+ ASSERT_TRUE(object->size() > 0);
+ }
+}
More information about the Mlir-commits
mailing list