[Mlir-commits] [mlir] f849640 - [MLIR] Make the ROCM integration tests runnable
Krzysztof Drewniak
llvmlistbot at llvm.org
Fri Nov 19 09:09:58 PST 2021
Author: Krzysztof Drewniak
Date: 2021-11-19T17:09:53Z
New Revision: f849640a0c6c628da398d4d390f6695d318e976d
URL: https://github.com/llvm/llvm-project/commit/f849640a0c6c628da398d4d390f6695d318e976d
DIFF: https://github.com/llvm/llvm-project/commit/f849640a0c6c628da398d4d390f6695d318e976d.diff
LOG: [MLIR] Make the ROCM integration tests runnable
- Move the #define s to the GPU Transform library from GPU Ops so that
SerializeToHsaco is non-trivially compiled
- Add required includes to SerializeToHsaco
- Move MCSubtargetInfo creation to the correct point in the
compilation process
- Change mlir in ROCM tests to account for renamed/moved ops
Differential Revision: https://reviews.llvm.org/D114184
Added:
Modified:
mlir/lib/Dialect/GPU/CMakeLists.txt
mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
mlir/lib/ExecutionEngine/CMakeLists.txt
mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir
mlir/test/Integration/GPU/ROCM/two-modules.mlir
mlir/test/Integration/GPU/ROCM/vecadd.mlir
mlir/test/Integration/GPU/ROCM/vector-transferops.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt
index fee795f60567e..5fe79070737c4 100644
--- a/mlir/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/CMakeLists.txt
@@ -145,14 +145,14 @@ if(MLIR_ENABLE_ROCM_RUNNER)
message(STATUS "ROCm HIP version: ${HIP_VERSION}")
endif()
- target_compile_definitions(obj.MLIRGPUOps
+ target_compile_definitions(obj.MLIRGPUTransforms
PRIVATE
__HIP_PLATFORM_HCC__
__ROCM_PATH__="${ROCM_PATH}"
MLIR_GPU_TO_HSACO_PASS_ENABLE=1
)
- target_include_directories(obj.MLIRGPUOps
+ target_include_directories(obj.MLIRGPUTransforms
PRIVATE
${MLIR_SOURCE_DIR}/../lld/include
${HIP_PATH}/include
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
index ab81ffac39c2a..29edd535e3cf2 100644
--- a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
@@ -11,6 +11,8 @@
//
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/GPU/Passes.h"
+#include "mlir/IR/Location.h"
+#include "mlir/IR/MLIRContext.h"
#if MLIR_GPU_TO_HSACO_PASS_ENABLE
#include "mlir/Pass/Pass.h"
@@ -32,8 +34,11 @@
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/Program.h"
+#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/WithColor.h"
+
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "lld/Common/Driver.h"
@@ -170,8 +175,11 @@ SerializeToHsacoPass::assembleIsa(const std::string &isa) {
std::unique_ptr<llvm::MCAsmInfo> mai(
target->createMCAsmInfo(*mri, this->triple, mcOptions));
mai->setRelaxELFRelocations(true);
+ std::unique_ptr<llvm::MCSubtargetInfo> sti(
+ target->createMCSubtargetInfo(this->triple, this->chip, this->features));
- llvm::MCContext ctx(triple, mai.get(), mri.get(), &srcMgr, &mcOptions);
+ llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr,
+ &mcOptions);
std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo(
ctx, /*PIC=*/false, /*LargeCodeModel=*/false));
ctx.setObjectFileInfo(mofi.get());
@@ -182,8 +190,6 @@ SerializeToHsacoPass::assembleIsa(const std::string &isa) {
std::unique_ptr<llvm::MCStreamer> mcStreamer;
std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo());
- std::unique_ptr<llvm::MCSubtargetInfo> sti(
- target->createMCSubtargetInfo(this->triple, this->chip, this->features));
llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, *mri, ctx);
llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions);
diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt
index 0e8d184c85a0e..c52837c2e9ad9 100644
--- a/mlir/lib/ExecutionEngine/CMakeLists.txt
+++ b/mlir/lib/ExecutionEngine/CMakeLists.txt
@@ -202,8 +202,11 @@ if(MLIR_ENABLE_ROCM_RUNNER)
${HIP_PATH}/include
${ROCM_PATH}/include
)
+ set_property(TARGET mlir_rocm_runtime
+ PROPERTY INSTALL_RPATH_USE_LINK_PATH ON)
+
target_link_libraries(mlir_rocm_runtime
- PRIVATE
+ PUBLIC
${ROCM_RUNTIME_LIBRARY}
)
endif()
diff --git a/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir b/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir
index fef4e04cb4f32..90ddad5c29eff 100644
--- a/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir
+++ b/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir
@@ -11,10 +11,10 @@
func @other_func(%arg0 : f32, %arg1 : memref<?xf32>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
- %block_dim = dim %arg1, %c0 : memref<?xf32>
+ %block_dim = memref.dim %arg1, %c0 : memref<?xf32>
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1)
threads(%tx, %ty, %tz) in (%block_x = %block_dim, %block_y = %c1, %block_z = %c1) {
- store %arg0, %arg1[%tx] : memref<?xf32>
+ memref.store %arg0, %arg1[%tx] : memref<?xf32>
gpu.terminator
}
return
@@ -22,12 +22,12 @@ func @other_func(%arg0 : f32, %arg1 : memref<?xf32>) {
// CHECK: [1, 1, 1, 1, 1]
func @main() {
- %arg0 = alloc() : memref<5xf32>
+ %arg0 = memref.alloc() : memref<5xf32>
%21 = arith.constant 5 : i32
- %22 = memref_cast %arg0 : memref<5xf32> to memref<?xf32>
- %cast = memref_cast %22 : memref<?xf32> to memref<*xf32>
+ %22 = memref.cast %arg0 : memref<5xf32> to memref<?xf32>
+ %cast = memref.cast %22 : memref<?xf32> to memref<*xf32>
gpu.host_register %cast : memref<*xf32>
- %23 = memref_cast %22 : memref<?xf32> to memref<*xf32>
+ %23 = memref.cast %22 : memref<?xf32> to memref<*xf32>
call @print_memref_f32(%23) : (memref<*xf32>) -> ()
%24 = arith.constant 1.0 : f32
%25 = call @mgpuMemGetDeviceMemRef1dFloat(%22) : (memref<?xf32>) -> (memref<?xf32>)
diff --git a/mlir/test/Integration/GPU/ROCM/two-modules.mlir b/mlir/test/Integration/GPU/ROCM/two-modules.mlir
index c865172bfa0f6..5a383cae663b9 100644
--- a/mlir/test/Integration/GPU/ROCM/two-modules.mlir
+++ b/mlir/test/Integration/GPU/ROCM/two-modules.mlir
@@ -10,24 +10,24 @@
// CHECK: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
func @main() {
- %arg = alloc() : memref<13xi32>
- %dst = memref_cast %arg : memref<13xi32> to memref<?xi32>
+ %arg = memref.alloc() : memref<13xi32>
+ %dst = memref.cast %arg : memref<13xi32> to memref<?xi32>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
- %sx = dim %dst, %c0 : memref<?xi32>
- %cast_dst = memref_cast %dst : memref<?xi32> to memref<*xi32>
+ %sx = memref.dim %dst, %c0 : memref<?xi32>
+ %cast_dst = memref.cast %dst : memref<?xi32> to memref<*xi32>
gpu.host_register %cast_dst : memref<*xi32>
%dst_device = call @mgpuMemGetDeviceMemRef1dInt32(%dst) : (memref<?xi32>) -> (memref<?xi32>)
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1)
threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %c1, %block_z = %c1) {
%t0 = arith.index_cast %tx : index to i32
- store %t0, %dst_device[%tx] : memref<?xi32>
+ memref.store %t0, %dst_device[%tx] : memref<?xi32>
gpu.terminator
}
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1)
threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %c1, %block_z = %c1) {
%t0 = arith.index_cast %tx : index to i32
- store %t0, %dst_device[%tx] : memref<?xi32>
+ memref.store %t0, %dst_device[%tx] : memref<?xi32>
gpu.terminator
}
call @print_memref_i32(%cast_dst) : (memref<*xi32>) -> ()
diff --git a/mlir/test/Integration/GPU/ROCM/vecadd.mlir b/mlir/test/Integration/GPU/ROCM/vecadd.mlir
index 1feaf34ea8f40..5546035d59bdb 100644
--- a/mlir/test/Integration/GPU/ROCM/vecadd.mlir
+++ b/mlir/test/Integration/GPU/ROCM/vecadd.mlir
@@ -12,13 +12,13 @@
func @vecadd(%arg0 : memref<?xf32>, %arg1 : memref<?xf32>, %arg2 : memref<?xf32>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
- %block_dim = dim %arg0, %c0 : memref<?xf32>
+ %block_dim = memref.dim %arg0, %c0 : memref<?xf32>
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1)
threads(%tx, %ty, %tz) in (%block_x = %block_dim, %block_y = %c1, %block_z = %c1) {
- %a = load %arg0[%tx] : memref<?xf32>
- %b = load %arg1[%tx] : memref<?xf32>
+ %a = memref.load %arg0[%tx] : memref<?xf32>
+ %b = memref.load %arg1[%tx] : memref<?xf32>
%c = arith.addf %a, %b : f32
- store %c, %arg2[%tx] : memref<?xf32>
+ memref.store %c, %arg2[%tx] : memref<?xf32>
gpu.terminator
}
return
@@ -30,19 +30,19 @@ func @main() {
%c1 = arith.constant 1 : index
%c5 = arith.constant 5 : index
%cf1dot23 = arith.constant 1.23 : f32
- %0 = alloc() : memref<5xf32>
- %1 = alloc() : memref<5xf32>
- %2 = alloc() : memref<5xf32>
- %3 = memref_cast %0 : memref<5xf32> to memref<?xf32>
- %4 = memref_cast %1 : memref<5xf32> to memref<?xf32>
- %5 = memref_cast %2 : memref<5xf32> to memref<?xf32>
+ %0 = memref.alloc() : memref<5xf32>
+ %1 = memref.alloc() : memref<5xf32>
+ %2 = memref.alloc() : memref<5xf32>
+ %3 = memref.cast %0 : memref<5xf32> to memref<?xf32>
+ %4 = memref.cast %1 : memref<5xf32> to memref<?xf32>
+ %5 = memref.cast %2 : memref<5xf32> to memref<?xf32>
scf.for %i = %c0 to %c5 step %c1 {
- store %cf1dot23, %3[%i] : memref<?xf32>
- store %cf1dot23, %4[%i] : memref<?xf32>
+ memref.store %cf1dot23, %3[%i] : memref<?xf32>
+ memref.store %cf1dot23, %4[%i] : memref<?xf32>
}
- %6 = memref_cast %3 : memref<?xf32> to memref<*xf32>
- %7 = memref_cast %4 : memref<?xf32> to memref<*xf32>
- %8 = memref_cast %5 : memref<?xf32> to memref<*xf32>
+ %6 = memref.cast %3 : memref<?xf32> to memref<*xf32>
+ %7 = memref.cast %4 : memref<?xf32> to memref<*xf32>
+ %8 = memref.cast %5 : memref<?xf32> to memref<*xf32>
gpu.host_register %6 : memref<*xf32>
gpu.host_register %7 : memref<*xf32>
gpu.host_register %8 : memref<*xf32>
diff --git a/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir
index c65b790df56c7..db358f43dd9c8 100644
--- a/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir
+++ b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir
@@ -59,19 +59,19 @@ func @main() {
%cf1 = arith.constant 1.0 : f32
%cf1dot23 = arith.constant 1.23 : f32
- %arg0 = alloc() : memref<4xf32>
- %arg1 = alloc() : memref<4xf32>
+ %arg0 = memref.alloc() : memref<4xf32>
+ %arg1 = memref.alloc() : memref<4xf32>
- %22 = memref_cast %arg0 : memref<4xf32> to memref<?xf32>
- %23 = memref_cast %arg1 : memref<4xf32> to memref<?xf32>
+ %22 = memref.cast %arg0 : memref<4xf32> to memref<?xf32>
+ %23 = memref.cast %arg1 : memref<4xf32> to memref<?xf32>
scf.for %i = %c0 to %c4 step %c1 {
- store %cf1dot23, %22[%i] : memref<?xf32>
- store %cf1dot23, %23[%i] : memref<?xf32>
+ memref.store %cf1dot23, %22[%i] : memref<?xf32>
+ memref.store %cf1dot23, %23[%i] : memref<?xf32>
}
- %cast0 = memref_cast %22 : memref<?xf32> to memref<*xf32>
- %cast1 = memref_cast %23 : memref<?xf32> to memref<*xf32>
+ %cast0 = memref.cast %22 : memref<?xf32> to memref<*xf32>
+ %cast1 = memref.cast %23 : memref<?xf32> to memref<*xf32>
gpu.host_register %cast0 : memref<*xf32>
gpu.host_register %cast1 : memref<*xf32>
More information about the Mlir-commits
mailing list