[Mlir-commits] [mlir] 69040d5 - [MLIR] Allow for multiple gpu modules during translation.
Stephan Herhut
llvmlistbot at llvm.org
Thu Apr 16 05:18:51 PDT 2020
Author: Stephan Herhut
Date: 2020-04-16T14:18:31+02:00
New Revision: 69040d5b0bfa59edacc2ad10d517b4270bf76845
URL: https://github.com/llvm/llvm-project/commit/69040d5b0bfa59edacc2ad10d517b4270bf76845
DIFF: https://github.com/llvm/llvm-project/commit/69040d5b0bfa59edacc2ad10d517b4270bf76845.diff
LOG: [MLIR] Allow for multiple gpu modules during translation.
This change makes the ModuleTranslation threadsafe by locking on the
LLVMContext. Furthermore, we now clone the llvm module into a new
context when compiling to PTX similar to what the OrcJit does.
Differential Revision: https://reviews.llvm.org/D78207
Added:
mlir/test/mlir-cuda-runner/two-modules.mlir
Modified:
mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td
mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp
mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
mlir/lib/Dialect/LLVMIR/CMakeLists.txt
mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
mlir/lib/ExecutionEngine/CMakeLists.txt
mlir/lib/ExecutionEngine/ExecutionEngine.cpp
mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
index b33b38971249..c081a3df29cb 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
@@ -32,6 +32,10 @@
namespace llvm {
class Type;
class LLVMContext;
+namespace sys {
+template <bool mt_only>
+class SmartMutex;
+} // end namespace sys
} // end namespace llvm
namespace mlir {
@@ -216,6 +220,12 @@ Value createGlobalString(Location loc, OpBuilder &builder, StringRef name,
/// function confirms that the Operation has the desired properties.
bool satisfiesLLVMModule(Operation *op);
+/// Clones the given module into the provided context. This is implemented by
+/// transforming the module into bitcode and then reparsing the bitcode in the
+/// provided context.
+std::unique_ptr<llvm::Module>
+cloneModuleIntoNewContext(llvm::LLVMContext *context, llvm::Module *module);
+
} // end namespace LLVM
} // end namespace mlir
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td
index 20ed573ab8bd..48eecb4eed87 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td
@@ -24,6 +24,7 @@ def LLVM_Dialect : Dialect {
~LLVMDialect();
llvm::LLVMContext &getLLVMContext();
llvm::Module &getLLVMModule();
+ llvm::sys::SmartMutex<true> &getLLVMContextMutex();
private:
friend LLVMType;
diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
index 7ba4a7d21adf..e7223bf7349a 100644
--- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
@@ -106,7 +106,6 @@ class ModuleTranslation {
/// Original and translated module.
Operation *mlirModule;
std::unique_ptr<llvm::Module> llvmModule;
-
/// A converter for translating debug information.
std::unique_ptr<detail::DebugTranslation> debugTranslation;
@@ -114,6 +113,8 @@ class ModuleTranslation {
std::unique_ptr<llvm::OpenMPIRBuilder> ompBuilder;
/// Precomputed pointer to OpenMP dialect.
const Dialect *ompDialect;
+ /// Pointer to the llvmDialect;
+ LLVMDialect *llvmDialect;
/// Mappings between llvm.mlir.global definitions and corresponding globals.
DenseMap<Operation *, llvm::GlobalValue *> globalsMapping;
diff --git a/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp b/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp
index 38820f174d98..7cdb0dda4454 100644
--- a/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp
+++ b/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp
@@ -15,6 +15,7 @@
#include "mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/Function.h"
@@ -98,12 +99,19 @@ std::string GpuKernelToCubinPass::translateModuleToPtx(
llvm::Module &module, llvm::TargetMachine &target_machine) {
std::string ptx;
{
+ // Clone the llvm module into a new context to enable concurrent compilation
+ // with multiple threads.
+ // TODO(zinenko): Reevaluate model of ownership of LLVMContext in
+ // LLVMDialect.
+ llvm::LLVMContext llvmContext;
+ auto clone = LLVM::cloneModuleIntoNewContext(&llvmContext, &module);
+
llvm::raw_string_ostream stream(ptx);
llvm::buffer_ostream pstream(stream);
llvm::legacy::PassManager codegen_passes;
target_machine.addPassesToEmitFile(codegen_passes, pstream, nullptr,
llvm::CGFT_AssemblyFile);
- codegen_passes.run(module);
+ codegen_passes.run(*clone);
}
return ptx;
diff --git a/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp b/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
index 134ca5d6c6e7..bdd9bb66f617 100644
--- a/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
+++ b/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
@@ -116,8 +116,8 @@ class GpuLaunchFuncToCudaCallsPass
void addParamToList(OpBuilder &builder, Location loc, Value param, Value list,
unsigned pos, Value one);
Value setupParamsArray(gpu::LaunchFuncOp launchOp, OpBuilder &builder);
- Value generateKernelNameConstant(StringRef name, Location loc,
- OpBuilder &builder);
+ Value generateKernelNameConstant(StringRef moduleName, StringRef name,
+ Location loc, OpBuilder &builder);
void translateGpuLaunchCalls(mlir::gpu::LaunchFuncOp launchOp);
public:
@@ -345,12 +345,13 @@ Value GpuLaunchFuncToCudaCallsPass::setupParamsArray(gpu::LaunchFuncOp launchOp,
// %2 = llvm.getelementptr %0[%1, %1] : !llvm<"i8*">
// }
Value GpuLaunchFuncToCudaCallsPass::generateKernelNameConstant(
- StringRef name, Location loc, OpBuilder &builder) {
+ StringRef moduleName, StringRef name, Location loc, OpBuilder &builder) {
// Make sure the trailing zero is included in the constant.
std::vector<char> kernelName(name.begin(), name.end());
kernelName.push_back('\0');
- std::string globalName = std::string(llvm::formatv("{0}_kernel_name", name));
+ std::string globalName =
+ std::string(llvm::formatv("{0}_{1}_kernel_name", moduleName, name));
return LLVM::createGlobalString(
loc, builder, globalName, StringRef(kernelName.data(), kernelName.size()),
LLVM::Linkage::Internal, llvmDialect);
@@ -415,7 +416,8 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls(
// the kernel function.
auto cuOwningModuleRef =
builder.create<LLVM::LoadOp>(loc, getPointerType(), cuModule);
- auto kernelName = generateKernelNameConstant(launchOp.kernel(), loc, builder);
+ auto kernelName = generateKernelNameConstant(launchOp.getKernelModuleName(),
+ launchOp.kernel(), loc, builder);
auto cuFunction = allocatePointer(builder, loc);
auto cuModuleGetFunction =
getOperation().lookupSymbol<LLVM::LLVMFuncOp>(cuModuleGetFunctionName);
diff --git a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
index 148bc4bef3e8..833438a70cb9 100644
--- a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
+++ b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
@@ -13,6 +13,8 @@ add_mlir_dialect_library(MLIRLLVMIR
target_link_libraries(MLIRLLVMIR
PUBLIC
LLVMAsmParser
+ LLVMBitReader
+ LLVMBitWriter
LLVMCore
LLVMSupport
LLVMFrontendOpenMP
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
index 9ad878006e09..7ce591de3802 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -20,6 +20,8 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/AsmParser/Parser.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
@@ -1682,6 +1684,9 @@ LLVMDialect::~LLVMDialect() {}
llvm::LLVMContext &LLVMDialect::getLLVMContext() { return impl->llvmContext; }
llvm::Module &LLVMDialect::getLLVMModule() { return impl->module; }
+llvm::sys::SmartMutex<true> &LLVMDialect::getLLVMContextMutex() {
+ return impl->mutex;
+}
/// Parse a type registered to this dialect.
Type LLVMDialect::parseType(DialectAsmParser &parser) const {
@@ -1971,3 +1976,16 @@ bool mlir::LLVM::satisfiesLLVMModule(Operation *op) {
return op->hasTrait<OpTrait::SymbolTable>() &&
op->hasTrait<OpTrait::IsIsolatedFromAbove>();
}
+
+std::unique_ptr<llvm::Module>
+mlir::LLVM::cloneModuleIntoNewContext(llvm::LLVMContext *context,
+ llvm::Module *module) {
+ SmallVector<char, 1> buffer;
+ {
+ llvm::raw_svector_ostream os(buffer);
+ WriteBitcodeToFile(*module, os);
+ }
+ llvm::MemoryBufferRef bufferRef(StringRef(buffer.data(), buffer.size()),
+ "cloned module buffer");
+ return cantFail(parseBitcodeFile(bufferRef, *context));
+}
diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt
index a30f987dbe98..df3268a49d59 100644
--- a/mlir/lib/ExecutionEngine/CMakeLists.txt
+++ b/mlir/lib/ExecutionEngine/CMakeLists.txt
@@ -17,8 +17,6 @@ target_link_libraries(MLIRExecutionEngine
PUBLIC
MLIRLLVMIR
MLIRTargetLLVMIR
- LLVMBitReader
- LLVMBitWriter
LLVMExecutionEngine
LLVMObject
LLVMOrcJIT
diff --git a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
index 2314dba09f59..25bd45f15885 100644
--- a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -11,13 +11,12 @@
//
//===----------------------------------------------------------------------===//
#include "mlir/ExecutionEngine/ExecutionEngine.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Function.h"
#include "mlir/IR/Module.h"
#include "mlir/Support/FileUtilities.h"
#include "mlir/Target/LLVMIR.h"
-#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/ExecutionEngine/JITEventListener.h"
#include "llvm/ExecutionEngine/ObjectCache.h"
#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
@@ -211,17 +210,8 @@ Expected<std::unique_ptr<ExecutionEngine>> ExecutionEngine::create(
// Clone module in a new LLVMContext since translateModuleToLLVMIR buries
// ownership too deeply.
// TODO(zinenko): Reevaluate model of ownership of LLVMContext in LLVMDialect.
- SmallVector<char, 1> buffer;
- {
- llvm::raw_svector_ostream os(buffer);
- WriteBitcodeToFile(*llvmModule, os);
- }
- llvm::MemoryBufferRef bufferRef(StringRef(buffer.data(), buffer.size()),
- "cloned module buffer");
- auto expectedModule = parseBitcodeFile(bufferRef, *ctx);
- if (!expectedModule)
- return expectedModule.takeError();
- std::unique_ptr<Module> deserModule = std::move(*expectedModule);
+ std::unique_ptr<Module> deserModule =
+ LLVM::cloneModuleIntoNewContext(ctx.get(), llvmModule.get());
auto dataLayout = deserModule->getDataLayout();
// Callback to create the object layer with symbol resolution to current
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index 78458e88ed8d..4cfa05ee33a3 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -301,7 +301,8 @@ ModuleTranslation::ModuleTranslation(Operation *module,
debugTranslation(
std::make_unique<DebugTranslation>(module, *this->llvmModule)),
ompDialect(
- module->getContext()->getRegisteredDialect<omp::OpenMPDialect>()) {
+ module->getContext()->getRegisteredDialect<omp::OpenMPDialect>()),
+ llvmDialect(module->getContext()->getRegisteredDialect<LLVMDialect>()) {
assert(satisfiesLLVMModule(mlirModule) &&
"mlirModule should honor LLVM's module semantics.");
}
@@ -495,6 +496,9 @@ LogicalResult ModuleTranslation::convertBlock(Block &bb, bool ignoreArguments) {
/// Create named global variables that correspond to llvm.mlir.global
/// definitions.
LogicalResult ModuleTranslation::convertGlobals() {
+ // Lock access to the llvm context.
+ llvm::sys::SmartScopedLock<true> scopedLock(
+ llvmDialect->getLLVMContextMutex());
for (auto op : getModuleBody(mlirModule).getOps<LLVM::GlobalOp>()) {
llvm::Type *type = op.getType().getUnderlyingType();
llvm::Constant *cst = llvm::UndefValue::get(type);
@@ -754,6 +758,9 @@ LogicalResult ModuleTranslation::checkSupportedModuleOps(Operation *m) {
}
LogicalResult ModuleTranslation::convertFunctions() {
+ // Lock access to the llvm context.
+ llvm::sys::SmartScopedLock<true> scopedLock(
+ llvmDialect->getLLVMContextMutex());
// Declare all functions first because there may be function calls that form a
// call graph with cycles.
for (auto function : getModuleBody(mlirModule).getOps<LLVMFuncOp>()) {
@@ -798,6 +805,8 @@ std::unique_ptr<llvm::Module>
ModuleTranslation::prepareLLVMModule(Operation *m) {
auto *dialect = m->getContext()->getRegisteredDialect<LLVM::LLVMDialect>();
assert(dialect && "LLVM dialect must be registered");
+ // Lock the LLVM context as we might create new types here.
+ llvm::sys::SmartScopedLock<true> scopedLock(dialect->getLLVMContextMutex());
auto llvmModule = llvm::CloneModule(dialect->getLLVMModule());
if (!llvmModule)
diff --git a/mlir/test/mlir-cuda-runner/two-modules.mlir b/mlir/test/mlir-cuda-runner/two-modules.mlir
new file mode 100644
index 000000000000..0f01b36f5cee
--- /dev/null
+++ b/mlir/test/mlir-cuda-runner/two-modules.mlir
@@ -0,0 +1,28 @@
+// RUN: mlir-cuda-runner %s --print-ir-after-all --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext --entry-point-result=void | FileCheck %s --dump-input=always
+
+// CHECK: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
+func @main() {
+ %arg = alloc() : memref<13xi32>
+ %dst = memref_cast %arg : memref<13xi32> to memref<?xi32>
+ %one = constant 1 : index
+ %sx = dim %dst, 0 : memref<?xi32>
+ call @mcuMemHostRegisterMemRef1dInt32(%dst) : (memref<?xi32>) -> ()
+ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one)
+ threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) {
+ %t0 = index_cast %tx : index to i32
+ store %t0, %dst[%tx] : memref<?xi32>
+ gpu.terminator
+ }
+ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one)
+ threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) {
+ %t0 = index_cast %tx : index to i32
+ store %t0, %dst[%tx] : memref<?xi32>
+ gpu.terminator
+ }
+ %U = memref_cast %dst : memref<?xi32> to memref<*xi32>
+ call @print_memref_i32(%U) : (memref<*xi32>) -> ()
+ return
+}
+
+func @mcuMemHostRegisterMemRef1dInt32(%ptr : memref<?xi32>)
+func @print_memref_i32(%ptr : memref<*xi32>)
More information about the Mlir-commits
mailing list