[flang-commits] [mlir] [lldb] [llvm] [clang] [flang] [libc] [compiler-rt] [libcxx] [clang-tools-extra] [lld] [MLIR] Enable GPU Dialect to SYCL runtime integration (PR #71430)
Sang Ik Lee via flang-commits
flang-commits at lists.llvm.org
Tue Nov 14 11:42:23 PST 2023
https://github.com/silee2 updated https://github.com/llvm/llvm-project/pull/71430
>From c76403cf8629b8f7d8a5b7a3ee5da2881713a7f8 Mon Sep 17 00:00:00 2001
From: "Lee, Sang Ik" <sang.ik.lee at intel.com>
Date: Mon, 6 Nov 2023 18:47:23 +0000
Subject: [PATCH 1/5] [MLIR] Enable GPU Dialect to SYCL runtime integration
GPU Dialect lowering to SYCL runtime is driven by spirv.target_env
attached to gpu.module. As a result of this, spirv.target_env remains
as an input to LLVMIR Translation.
A SPIRVToLLVMIRTranslation without any actual translation is added to
avoid an unregistered error in mlir-cpu-runner.
SelectObjectAttr.cpp is updated to
1) Pass binary size argument to getModuleLoadFn
2) Pass parameter count to getKernelLaunchFn
This change does not impact CUDA and ROCM usage since both
mlir_cuda_runtime and mlir_rocm_runtime are already updated to
accept and ignore the extra arguments.
---
mlir/include/mlir/Target/LLVMIR/Dialect/All.h | 3 ++
.../Dialect/SPIRV/SPIRVToLLVMIRTranslation.h | 31 +++++++++++
mlir/lib/Target/LLVMIR/CMakeLists.txt | 1 +
mlir/lib/Target/LLVMIR/Dialect/CMakeLists.txt | 1 +
.../LLVMIR/Dialect/GPU/SelectObjectAttr.cpp | 50 +++++++++++++----
.../LLVMIR/Dialect/SPIRV/CMakeLists.txt | 13 +++++
.../SPIRV/SPIRVToLLVMIRTranslation.cpp | 31 +++++++++++
mlir/test/CMakeLists.txt | 4 ++
.../Integration/GPU/SYCL/gpu-to-spirv.mlir | 54 +++++++++++++++++++
mlir/test/Integration/GPU/SYCL/lit.local.cfg | 2 +
mlir/test/Target/LLVMIR/gpu.mlir | 9 ++--
mlir/test/lit.cfg.py | 3 ++
mlir/test/lit.site.cfg.py.in | 1 +
13 files changed, 188 insertions(+), 15 deletions(-)
create mode 100644 mlir/include/mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h
create mode 100644 mlir/lib/Target/LLVMIR/Dialect/SPIRV/CMakeLists.txt
create mode 100644 mlir/lib/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.cpp
create mode 100644 mlir/test/Integration/GPU/SYCL/gpu-to-spirv.mlir
create mode 100644 mlir/test/Integration/GPU/SYCL/lit.local.cfg
diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/All.h b/mlir/include/mlir/Target/LLVMIR/Dialect/All.h
index 0563b9bf3d475a4..5dfc15afb75931a 100644
--- a/mlir/include/mlir/Target/LLVMIR/Dialect/All.h
+++ b/mlir/include/mlir/Target/LLVMIR/Dialect/All.h
@@ -26,6 +26,7 @@
#include "mlir/Target/LLVMIR/Dialect/OpenACC/OpenACCToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/X86Vector/X86VectorToLLVMIRTranslation.h"
namespace mlir {
@@ -45,6 +46,7 @@ static inline void registerAllToLLVMIRTranslations(DialectRegistry ®istry) {
registerOpenACCDialectTranslation(registry);
registerOpenMPDialectTranslation(registry);
registerROCDLDialectTranslation(registry);
+ registerSPIRVDialectTranslation(registry);
registerX86VectorDialectTranslation(registry);
// Extension required for translating GPU offloading Ops.
@@ -61,6 +63,7 @@ registerAllGPUToLLVMIRTranslations(DialectRegistry ®istry) {
registerLLVMDialectTranslation(registry);
registerNVVMDialectTranslation(registry);
registerROCDLDialectTranslation(registry);
+ registerSPIRVDialectTranslation(registry);
// Extension required for translating GPU offloading Ops.
gpu::registerOffloadingLLVMTranslationInterfaceExternalModels(registry);
diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h b/mlir/include/mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h
new file mode 100644
index 000000000000000..e9580a10b4ca780
--- /dev/null
+++ b/mlir/include/mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h
@@ -0,0 +1,31 @@
+//===- SPIRVToLLVMIRTranslation.h - SPIRV to LLVM IR ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides registration calls for SPIRV dialect to LLVM IR translation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TARGET_LLVMIR_DIALECT_SPIRV_SPIRVTOLLVMIRTRANSLATION_H
+#define MLIR_TARGET_LLVMIR_DIALECT_SPIRV_SPIRVTOLLVMIRTRANSLATION_H
+
+namespace mlir {
+
+class DialectRegistry;
+class MLIRContext;
+
+/// Register the SPIRV dialect and the translation from it to the LLVM IR in the
+/// given registry;
+void registerSPIRVDialectTranslation(DialectRegistry ®istry);
+
+/// Register the SPIRV dialect and the translation from it in the registry
+/// associated with the given context.
+void registerSPIRVDialectTranslation(MLIRContext &context);
+
+} // namespace mlir
+
+#endif // MLIR_TARGET_LLVMIR_DIALECT_SPIRV_SPIRVTOLLVMIRTRANSLATION_H
diff --git a/mlir/lib/Target/LLVMIR/CMakeLists.txt b/mlir/lib/Target/LLVMIR/CMakeLists.txt
index 5db0885d70d6e7a..531c15a8703e948 100644
--- a/mlir/lib/Target/LLVMIR/CMakeLists.txt
+++ b/mlir/lib/Target/LLVMIR/CMakeLists.txt
@@ -58,6 +58,7 @@ add_mlir_translation_library(MLIRToLLVMIRTranslationRegistration
MLIROpenACCToLLVMIRTranslation
MLIROpenMPToLLVMIRTranslation
MLIRROCDLToLLVMIRTranslation
+ MLIRSPIRVToLLVMIRTranslation
)
add_mlir_translation_library(MLIRTargetLLVMIRImport
diff --git a/mlir/lib/Target/LLVMIR/Dialect/CMakeLists.txt b/mlir/lib/Target/LLVMIR/Dialect/CMakeLists.txt
index fb0e5cd0649f636..c9d916d8a5d82d1 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/CMakeLists.txt
+++ b/mlir/lib/Target/LLVMIR/Dialect/CMakeLists.txt
@@ -9,4 +9,5 @@ add_subdirectory(NVVM)
add_subdirectory(OpenACC)
add_subdirectory(OpenMP)
add_subdirectory(ROCDL)
+add_subdirectory(SPIRV)
add_subdirectory(X86Vector)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp
index 47fe6973778cd7f..6ea0dac89a42c18 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp
@@ -52,6 +52,10 @@ class SelectObjectAttrImpl
std::string getBinaryIdentifier(StringRef binaryName) {
return binaryName.str() + "_bin_cst";
}
+// Returns an identifier for the global int64 holding the binary size.
+std::string getBinarySizeIdentifier(StringRef binaryName) {
+ return binaryName.str() + "_bin_size_cst";
+}
} // namespace
void mlir::gpu::registerOffloadingLLVMTranslationInterfaceExternalModels(
@@ -124,6 +128,17 @@ LogicalResult SelectObjectAttrImpl::embedBinary(
serializedObj->setLinkage(llvm::GlobalValue::LinkageTypes::InternalLinkage);
serializedObj->setAlignment(llvm::MaybeAlign(8));
serializedObj->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
+
+ // Embed the object size as a global constant.
+ llvm::Constant *binarySize =
+ llvm::ConstantInt::get(builder.getInt64Ty(), object.getObject().size());
+ llvm::GlobalVariable *serializedSize = new llvm::GlobalVariable(
+ *module, binarySize->getType(), true,
+ llvm::GlobalValue::LinkageTypes::InternalLinkage, binarySize,
+ getBinarySizeIdentifier(op.getName()));
+ serializedSize->setLinkage(llvm::GlobalValue::LinkageTypes::InternalLinkage);
+ serializedSize->setAlignment(llvm::MaybeAlign(8));
+ serializedSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
return success();
}
@@ -172,6 +187,7 @@ class LaunchKernel {
IRBuilderBase &builder;
mlir::LLVM::ModuleTranslation &moduleTranslation;
Type *i32Ty{};
+ Type *i64Ty{};
Type *voidTy{};
Type *intPtrTy{};
PointerType *ptrTy{};
@@ -213,6 +229,7 @@ llvm::LaunchKernel::LaunchKernel(
mlir::LLVM::ModuleTranslation &moduleTranslation)
: module(module), builder(builder), moduleTranslation(moduleTranslation) {
i32Ty = builder.getInt32Ty();
+ i64Ty = builder.getInt64Ty();
ptrTy = builder.getPtrTy(0);
voidTy = builder.getVoidTy();
intPtrTy = builder.getIntPtrTy(module.getDataLayout());
@@ -221,11 +238,11 @@ llvm::LaunchKernel::LaunchKernel(
llvm::FunctionCallee llvm::LaunchKernel::getKernelLaunchFn() {
return module.getOrInsertFunction(
"mgpuLaunchKernel",
- FunctionType::get(
- voidTy,
- ArrayRef<Type *>({ptrTy, intPtrTy, intPtrTy, intPtrTy, intPtrTy,
- intPtrTy, intPtrTy, i32Ty, ptrTy, ptrTy, ptrTy}),
- false));
+ FunctionType::get(voidTy,
+ ArrayRef<Type *>({ptrTy, intPtrTy, intPtrTy, intPtrTy,
+ intPtrTy, intPtrTy, intPtrTy, i32Ty,
+ ptrTy, ptrTy, ptrTy, i64Ty}),
+ false));
}
llvm::FunctionCallee llvm::LaunchKernel::getModuleFunctionFn() {
@@ -237,7 +254,7 @@ llvm::FunctionCallee llvm::LaunchKernel::getModuleFunctionFn() {
llvm::FunctionCallee llvm::LaunchKernel::getModuleLoadFn() {
return module.getOrInsertFunction(
"mgpuModuleLoad",
- FunctionType::get(ptrTy, ArrayRef<Type *>({ptrTy}), false));
+ FunctionType::get(ptrTy, ArrayRef<Type *>({ptrTy, i64Ty}), false));
}
llvm::FunctionCallee llvm::LaunchKernel::getModuleLoadJITFn() {
@@ -377,10 +394,21 @@ llvm::LaunchKernel::createKernelLaunch(mlir::gpu::LaunchFuncOp op,
if (!binary)
return op.emitError() << "Couldn't find the binary: " << binaryIdentifier;
+ llvm::Constant *paramsCount =
+ llvm::ConstantInt::get(i64Ty, op.getNumKernelOperands());
+
+ std::string binarySizeIdentifier = getBinarySizeIdentifier(moduleName);
+ Value *binarySizeVar = module.getGlobalVariable(binarySizeIdentifier, true);
+ if (!binarySizeVar)
+ return op.emitError() << "Couldn't find the binary size: "
+ << binarySizeIdentifier;
+ Value *binarySize =
+ dyn_cast<llvm::GlobalVariable>(binarySizeVar)->getInitializer();
+
Value *moduleObject =
object.getFormat() == gpu::CompilationTarget::Assembly
? builder.CreateCall(getModuleLoadJITFn(), {binary, optV})
- : builder.CreateCall(getModuleLoadFn(), {binary});
+ : builder.CreateCall(getModuleLoadFn(), {binary, binarySize});
// Load the kernel function.
Value *moduleFunction = builder.CreateCall(
@@ -401,10 +429,10 @@ llvm::LaunchKernel::createKernelLaunch(mlir::gpu::LaunchFuncOp op,
// Create the launch call.
Value *nullPtr = ConstantPointerNull::get(ptrTy);
- builder.CreateCall(
- getKernelLaunchFn(),
- ArrayRef<Value *>({moduleFunction, gx, gy, gz, bx, by, bz,
- dynamicMemorySize, stream, argArray, nullPtr}));
+ builder.CreateCall(getKernelLaunchFn(),
+ ArrayRef<Value *>({moduleFunction, gx, gy, gz, bx, by, bz,
+ dynamicMemorySize, stream, argArray,
+ nullPtr, paramsCount}));
// Sync & destroy the stream, for synchronous launches.
if (handleStream) {
diff --git a/mlir/lib/Target/LLVMIR/Dialect/SPIRV/CMakeLists.txt b/mlir/lib/Target/LLVMIR/Dialect/SPIRV/CMakeLists.txt
new file mode 100644
index 000000000000000..850b95b8ddc77a0
--- /dev/null
+++ b/mlir/lib/Target/LLVMIR/Dialect/SPIRV/CMakeLists.txt
@@ -0,0 +1,13 @@
+add_mlir_translation_library(MLIRSPIRVToLLVMIRTranslation
+ SPIRVToLLVMIRTranslation.cpp
+
+ LINK_COMPONENTS
+ Core
+
+ LINK_LIBS PUBLIC
+ MLIRIR
+ MLIRLLVMDialect
+ MLIRSPIRVDialect
+ MLIRSupport
+ MLIRTargetLLVMIRExport
+ )
diff --git a/mlir/lib/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.cpp
new file mode 100644
index 000000000000000..06038a17f2ef666
--- /dev/null
+++ b/mlir/lib/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.cpp
@@ -0,0 +1,31 @@
+//===- SPIRVToLLVMIRTranslation.cpp - Translate SPIRV to LLVM IR ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a translation between the MLIR SPIRV dialect and
+// LLVM IR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h"
+#include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/Target/LLVMIR/ModuleTranslation.h"
+
+using namespace mlir;
+using namespace mlir::LLVM;
+
+void mlir::registerSPIRVDialectTranslation(DialectRegistry ®istry) {
+ registry.insert<spirv::SPIRVDialect>();
+}
+
+void mlir::registerSPIRVDialectTranslation(MLIRContext &context) {
+ DialectRegistry registry;
+ registerSPIRVDialectTranslation(registry);
+ context.appendDialectRegistry(registry);
+}
diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt
index d81f3c4b1e20c5a..c26826d1b2c62fe 100644
--- a/mlir/test/CMakeLists.txt
+++ b/mlir/test/CMakeLists.txt
@@ -139,6 +139,10 @@ if(MLIR_ENABLE_ROCM_RUNNER)
list(APPEND MLIR_TEST_DEPENDS mlir_rocm_runtime)
endif()
+if(MLIR_ENABLE_SYCL_RUNNER)
+ list(APPEND MLIR_TEST_DEPENDS mlir_sycl_runtime)
+endif()
+
list(APPEND MLIR_TEST_DEPENDS MLIRUnitTests)
if(LLVM_BUILD_EXAMPLES)
diff --git a/mlir/test/Integration/GPU/SYCL/gpu-to-spirv.mlir b/mlir/test/Integration/GPU/SYCL/gpu-to-spirv.mlir
new file mode 100644
index 000000000000000..bc6f3cea080df20
--- /dev/null
+++ b/mlir/test/Integration/GPU/SYCL/gpu-to-spirv.mlir
@@ -0,0 +1,54 @@
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-cf-to-llvm,convert-arith-to-llvm,convert-math-to-llvm,convert-func-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,finalize-memref-to-llvm,reconcile-unrealized-casts)' \
+// RUN: | mlir-cpu-runner \
+// RUN: --shared-libs=%mlir_sycl_runtime \
+// RUN: --shared-libs=%mlir_runner_utils \
+// RUN: --entry-point-result=void \
+// RUN: | FileCheck %s
+
+module @add attributes {gpu.container_module} {
+ memref.global "private" constant @__constant_3x3xi64_0 : memref<3x3xi64> = dense<[[1, 4098, 3], [16777220, 5, 4294967302], [7, 1099511627784, 9]]>
+ memref.global "private" constant @__constant_3x3xi64 : memref<3x3xi64> = dense<[[1, 2, 3], [4, 5, 4102], [16777223, 4294967304, 1099511627785]]>
+ func.func @main() {
+ %0 = memref.get_global @__constant_3x3xi64 : memref<3x3xi64>
+ %1 = memref.get_global @__constant_3x3xi64_0 : memref<3x3xi64>
+ %2 = call @test(%0, %1) : (memref<3x3xi64>, memref<3x3xi64>) -> memref<3x3xi64>
+ %cast = memref.cast %2 : memref<3x3xi64> to memref<*xi64>
+ call @printMemrefI64(%cast) : (memref<*xi64>) -> ()
+ return
+ }
+ func.func private @printMemrefI64(memref<*xi64>)
+ func.func @test(%arg0: memref<3x3xi64>, %arg1: memref<3x3xi64>) -> memref<3x3xi64> {
+ %c3 = arith.constant 3 : index
+ %c1 = arith.constant 1 : index
+ %mem = gpu.alloc host_shared () : memref<3x3xi64>
+ memref.copy %arg1, %mem : memref<3x3xi64> to memref<3x3xi64>
+ %memref_0 = gpu.alloc host_shared () : memref<3x3xi64>
+ memref.copy %arg0, %memref_0 : memref<3x3xi64> to memref<3x3xi64>
+ %memref_2 = gpu.alloc host_shared () : memref<3x3xi64>
+ %2 = gpu.wait async
+ %3 = gpu.launch_func async [%2] @test_kernel::@test_kernel blocks in (%c3, %c3, %c1) threads in (%c1, %c1, %c1) args(%memref_0 : memref<3x3xi64>, %mem : memref<3x3xi64>, %memref_2 : memref<3x3xi64>)
+ gpu.wait [%3]
+ %alloc = memref.alloc() : memref<3x3xi64>
+ memref.copy %memref_2, %alloc : memref<3x3xi64> to memref<3x3xi64>
+ %4 = gpu.wait async
+ %5 = gpu.dealloc async [%4] %memref_2 : memref<3x3xi64>
+ %6 = gpu.dealloc async [%5] %memref_0 : memref<3x3xi64>
+ %7 = gpu.dealloc async [%6] %mem : memref<3x3xi64>
+ gpu.wait [%7]
+ return %alloc : memref<3x3xi64>
+ }
+ gpu.module @test_kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} {
+ gpu.func @test_kernel(%arg0: memref<3x3xi64>, %arg1: memref<3x3xi64>, %arg2: memref<3x3xi64>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 3, 3, 1>, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
+ %0 = gpu.block_id x
+ %1 = gpu.block_id y
+ %2 = memref.load %arg0[%0, %1] : memref<3x3xi64>
+ %3 = memref.load %arg1[%0, %1] : memref<3x3xi64>
+ %4 = arith.addi %2, %3 : i64
+ memref.store %4, %arg2[%0, %1] : memref<3x3xi64>
+ gpu.return
+ }
+ }
+ // CHECK: [2, 4100, 6],
+ // CHECK: [16777224, 10, 4294971404],
+ // CHECK: [16777230, 1103806595088, 1099511627794]
+}
diff --git a/mlir/test/Integration/GPU/SYCL/lit.local.cfg b/mlir/test/Integration/GPU/SYCL/lit.local.cfg
new file mode 100644
index 000000000000000..75bac1882eed5c9
--- /dev/null
+++ b/mlir/test/Integration/GPU/SYCL/lit.local.cfg
@@ -0,0 +1,2 @@
+if not config.enable_sycl_runner:
+ config.unsupported = True
diff --git a/mlir/test/Target/LLVMIR/gpu.mlir b/mlir/test/Target/LLVMIR/gpu.mlir
index fddbbee962c1aee..8a3fc13e0b9af71 100644
--- a/mlir/test/Target/LLVMIR/gpu.mlir
+++ b/mlir/test/Target/LLVMIR/gpu.mlir
@@ -4,6 +4,7 @@
module attributes {gpu.container_module} {
// CHECK: [[ARGS_TY:%.*]] = type { i32, i32 }
// CHECK: @kernel_module_bin_cst = internal constant [4 x i8] c"BLOB", align 8
+ // CHECK: @kernel_module_bin_size_cst = internal constant i64 4, align 8
// CHECK: @kernel_module_kernel_kernel_name = private unnamed_addr constant [7 x i8] c"kernel\00", align 1
gpu.binary @kernel_module [#gpu.object<#nvvm.target, "BLOB">]
llvm.func @foo() {
@@ -17,10 +18,10 @@ module attributes {gpu.container_module} {
// CHECK: store i32 32, ptr [[ARG1]], align 4
// CHECK: %{{.*}} = getelementptr ptr, ptr [[ARGS_ARRAY]], i32 1
// CHECK: store ptr [[ARG1]], ptr %{{.*}}, align 8
- // CHECK: [[MODULE:%.*]] = call ptr @mgpuModuleLoad(ptr @kernel_module_bin_cst)
+ // CHECK: [[MODULE:%.*]] = call ptr @mgpuModuleLoad(ptr @kernel_module_bin_cst, i64 4)
// CHECK: [[FUNC:%.*]] = call ptr @mgpuModuleGetFunction(ptr [[MODULE]], ptr @kernel_module_kernel_kernel_name)
// CHECK: [[STREAM:%.*]] = call ptr @mgpuStreamCreate()
- // CHECK: call void @mgpuLaunchKernel(ptr [[FUNC]], i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i32 256, ptr [[STREAM]], ptr [[ARGS_ARRAY]], ptr null)
+ // CHECK: call void @mgpuLaunchKernel(ptr [[FUNC]], i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i32 256, ptr [[STREAM]], ptr [[ARGS_ARRAY]], ptr null, i64 2)
// CHECK: call void @mgpuStreamSynchronize(ptr [[STREAM]])
// CHECK: call void @mgpuStreamDestroy(ptr [[STREAM]])
// CHECK: call void @mgpuModuleUnload(ptr [[MODULE]])
@@ -59,9 +60,9 @@ module attributes {gpu.container_module} {
// CHECK: = call ptr @mgpuStreamCreate()
// CHECK-NEXT: = alloca {{.*}}, align 8
// CHECK-NEXT: [[ARGS:%.*]] = alloca ptr, i64 0, align 8
- // CHECK-NEXT: [[MODULE:%.*]] = call ptr @mgpuModuleLoad(ptr @kernel_module_bin_cst)
+ // CHECK-NEXT: [[MODULE:%.*]] = call ptr @mgpuModuleLoad(ptr @kernel_module_bin_cst, i64 4)
// CHECK-NEXT: [[FUNC:%.*]] = call ptr @mgpuModuleGetFunction(ptr [[MODULE]], ptr @kernel_module_kernel_kernel_name)
- // CHECK-NEXT: call void @mgpuLaunchKernel(ptr [[FUNC]], i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i32 0, ptr {{.*}}, ptr [[ARGS]], ptr null)
+ // CHECK-NEXT: call void @mgpuLaunchKernel(ptr [[FUNC]], i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i32 0, ptr {{.*}}, ptr [[ARGS]], ptr null, i64 0)
// CHECK-NEXT: call void @mgpuModuleUnload(ptr [[MODULE]])
// CHECK-NEXT: call void @mgpuStreamSynchronize(ptr %{{.*}})
// CHECK-NEXT: call void @mgpuStreamDestroy(ptr %{{.*}})
diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py
index da8488373862c36..cb8e1ab9d8a4ca8 100644
--- a/mlir/test/lit.cfg.py
+++ b/mlir/test/lit.cfg.py
@@ -126,6 +126,9 @@ def add_runtime(name):
if config.enable_cuda_runner:
tools.extend([add_runtime("mlir_cuda_runtime")])
+if config.enable_sycl_runner:
+ tools.extend([add_runtime("mlir_sycl_runtime")])
+
# The following tools are optional
tools.extend(
[
diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in
index 2de40ba5e8e57e6..c994de0d3d16b7e 100644
--- a/mlir/test/lit.site.cfg.py.in
+++ b/mlir/test/lit.site.cfg.py.in
@@ -31,6 +31,7 @@ config.run_rocm_tests = @MLIR_ENABLE_ROCM_CONVERSIONS@
config.enable_rocm_runner = @MLIR_ENABLE_ROCM_RUNNER@
config.gpu_compilation_format = "@MLIR_GPU_COMPILATION_TEST_FORMAT@"
config.rocm_test_chipset = "@ROCM_TEST_CHIPSET@"
+config.enable_sycl_runner = @MLIR_ENABLE_SYCL_RUNNER@
config.enable_spirv_cpu_runner = @MLIR_ENABLE_SPIRV_CPU_RUNNER@
config.enable_vulkan_runner = @MLIR_ENABLE_VULKAN_RUNNER@
config.enable_bindings_python = @MLIR_ENABLE_BINDINGS_PYTHON@
>From 50c621ebb8c18b131bf2d124337e008ffede80bc Mon Sep 17 00:00:00 2001
From: "Lee, Sang Ik" <sang.ik.lee at intel.com>
Date: Mon, 13 Nov 2023 19:49:55 +0000
Subject: [PATCH 2/5] Address reviewer comments.
---
.../Dialect/SPIRV/SPIRVToLLVMIRTranslation.h | 10 +++----
.../LLVMIR/Dialect/GPU/SelectObjectAttr.cpp | 28 ++++---------------
.../SPIRV/SPIRVToLLVMIRTranslation.cpp | 4 +--
3 files changed, 13 insertions(+), 29 deletions(-)
diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h b/mlir/include/mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h
index e9580a10b4ca780..2b066a528deb58f 100644
--- a/mlir/include/mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.h
@@ -1,4 +1,4 @@
-//===- SPIRVToLLVMIRTranslation.h - SPIRV to LLVM IR ------------*- C++ -*-===//
+//===- SPIRVToLLVMIRTranslation.h - SPIR-V to LLVM IR -----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This provides registration calls for SPIRV dialect to LLVM IR translation.
+// This provides registration calls for SPIR-V dialect to LLVM IR translation.
//
//===----------------------------------------------------------------------===//
@@ -18,11 +18,11 @@ namespace mlir {
class DialectRegistry;
class MLIRContext;
-/// Register the SPIRV dialect and the translation from it to the LLVM IR in the
-/// given registry;
+/// Register the SPIR-V dialect and the translation from it to the LLVM IR in
+/// the given registry;
void registerSPIRVDialectTranslation(DialectRegistry ®istry);
-/// Register the SPIRV dialect and the translation from it in the registry
+/// Register the SPIR-V dialect and the translation from it in the registry
/// associated with the given context.
void registerSPIRVDialectTranslation(MLIRContext &context);
diff --git a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp
index 6ea0dac89a42c18..54947c16f5c561f 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp
@@ -52,10 +52,6 @@ class SelectObjectAttrImpl
std::string getBinaryIdentifier(StringRef binaryName) {
return binaryName.str() + "_bin_cst";
}
-// Returns an identifier for the global int64 holding the binary size.
-std::string getBinarySizeIdentifier(StringRef binaryName) {
- return binaryName.str() + "_bin_size_cst";
-}
} // namespace
void mlir::gpu::registerOffloadingLLVMTranslationInterfaceExternalModels(
@@ -128,17 +124,6 @@ LogicalResult SelectObjectAttrImpl::embedBinary(
serializedObj->setLinkage(llvm::GlobalValue::LinkageTypes::InternalLinkage);
serializedObj->setAlignment(llvm::MaybeAlign(8));
serializedObj->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
-
- // Embed the object size as a global constant.
- llvm::Constant *binarySize =
- llvm::ConstantInt::get(builder.getInt64Ty(), object.getObject().size());
- llvm::GlobalVariable *serializedSize = new llvm::GlobalVariable(
- *module, binarySize->getType(), true,
- llvm::GlobalValue::LinkageTypes::InternalLinkage, binarySize,
- getBinarySizeIdentifier(op.getName()));
- serializedSize->setLinkage(llvm::GlobalValue::LinkageTypes::InternalLinkage);
- serializedSize->setAlignment(llvm::MaybeAlign(8));
- serializedSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
return success();
}
@@ -397,13 +382,12 @@ llvm::LaunchKernel::createKernelLaunch(mlir::gpu::LaunchFuncOp op,
llvm::Constant *paramsCount =
llvm::ConstantInt::get(i64Ty, op.getNumKernelOperands());
- std::string binarySizeIdentifier = getBinarySizeIdentifier(moduleName);
- Value *binarySizeVar = module.getGlobalVariable(binarySizeIdentifier, true);
- if (!binarySizeVar)
- return op.emitError() << "Couldn't find the binary size: "
- << binarySizeIdentifier;
- Value *binarySize =
- dyn_cast<llvm::GlobalVariable>(binarySizeVar)->getInitializer();
+ auto binaryVar = dyn_cast<llvm::GlobalVariable>(binary);
+ llvm::Constant *binaryInit = binaryVar->getInitializer();
+ auto binaryDataSeq = dyn_cast<llvm::ConstantDataSequential>(binaryInit);
+ llvm::Constant *binarySize =
+ llvm::ConstantInt::get(i64Ty, binaryDataSeq->getNumElements() *
+ binaryDataSeq->getElementByteSize());
Value *moduleObject =
object.getFormat() == gpu::CompilationTarget::Assembly
diff --git a/mlir/lib/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.cpp
index 06038a17f2ef666..638edca5efde86f 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/SPIRV/SPIRVToLLVMIRTranslation.cpp
@@ -1,4 +1,4 @@
-//===- SPIRVToLLVMIRTranslation.cpp - Translate SPIRV to LLVM IR ----------===//
+//===- SPIRVToLLVMIRTranslation.cpp - Translate SPIR-V to LLVM IR ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements a translation between the MLIR SPIRV dialect and
+// This file implements a translation between the MLIR SPIR-V dialect and
// LLVM IR.
//
//===----------------------------------------------------------------------===//
>From b4068d5eb4cf874879182eba78b9bfa2854e4ec4 Mon Sep 17 00:00:00 2001
From: "Lee, Sang Ik" <sang.ik.lee at intel.com>
Date: Mon, 13 Nov 2023 23:08:03 +0000
Subject: [PATCH 3/5] Add more integration tests.
---
.../GPU/SYCL/gpu-addf32-to-spirv.mlir | 56 +++++++++++++
...to-spirv.mlir => gpu-addi64-to-spirv.mlir} | 0
.../GPU/SYCL/gpu-reluf32-to-spirv.mlir | 79 +++++++++++++++++++
3 files changed, 135 insertions(+)
create mode 100644 mlir/test/Integration/GPU/SYCL/gpu-addf32-to-spirv.mlir
rename mlir/test/Integration/GPU/SYCL/{gpu-to-spirv.mlir => gpu-addi64-to-spirv.mlir} (100%)
create mode 100644 mlir/test/Integration/GPU/SYCL/gpu-reluf32-to-spirv.mlir
diff --git a/mlir/test/Integration/GPU/SYCL/gpu-addf32-to-spirv.mlir b/mlir/test/Integration/GPU/SYCL/gpu-addf32-to-spirv.mlir
new file mode 100644
index 000000000000000..113a49425de5445
--- /dev/null
+++ b/mlir/test/Integration/GPU/SYCL/gpu-addf32-to-spirv.mlir
@@ -0,0 +1,56 @@
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-cf-to-llvm,convert-arith-to-llvm,convert-math-to-llvm,convert-func-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,finalize-memref-to-llvm,reconcile-unrealized-casts)' \
+// RUN: | mlir-cpu-runner \
+// RUN: --shared-libs=%mlir_sycl_runtime \
+// RUN: --shared-libs=%mlir_runner_utils \
+// RUN: --entry-point-result=void \
+// RUN: | FileCheck %s
+
+module @add attributes {gpu.container_module} {
+ memref.global "private" constant @__constant_2x2x2xf32_0 : memref<2x2x2xf32> = dense<[[[1.1, 2.2], [3.3, 4.4]], [[5.5, 6.6], [7.7, 8.8 ]]]>
+ memref.global "private" constant @__constant_2x2x2xf32 : memref<2x2x2xf32> = dense<[[[1.2, 2.3], [4.5, 5.8]], [[7.2, 8.3], [10.5, 11.8]]]>
+ func.func @main() {
+ %0 = memref.get_global @__constant_2x2x2xf32 : memref<2x2x2xf32>
+ %1 = memref.get_global @__constant_2x2x2xf32_0 : memref<2x2x2xf32>
+ %2 = call @test(%0, %1) : (memref<2x2x2xf32>, memref<2x2x2xf32>) -> memref<2x2x2xf32>
+ %cast = memref.cast %2 : memref<2x2x2xf32> to memref<*xf32>
+ call @printMemrefF32(%cast) : (memref<*xf32>) -> ()
+ return
+ }
+ func.func private @printMemrefF32(memref<*xf32>)
+ func.func @test(%arg0: memref<2x2x2xf32>, %arg1: memref<2x2x2xf32>) -> memref<2x2x2xf32> {
+ %c2 = arith.constant 2 : index
+ %c1 = arith.constant 1 : index
+ %mem = gpu.alloc host_shared () : memref<2x2x2xf32>
+ memref.copy %arg1, %mem : memref<2x2x2xf32> to memref<2x2x2xf32>
+ %memref_0 = gpu.alloc host_shared () : memref<2x2x2xf32>
+ memref.copy %arg0, %memref_0 : memref<2x2x2xf32> to memref<2x2x2xf32>
+ %memref_2 = gpu.alloc host_shared () : memref<2x2x2xf32>
+ %2 = gpu.wait async
+ %3 = gpu.launch_func async [%2] @test_kernel::@test_kernel blocks in (%c2, %c2, %c2) threads in (%c1, %c1, %c1) args(%memref_0 : memref<2x2x2xf32>, %mem : memref<2x2x2xf32>, %memref_2 : memref<2x2x2xf32>)
+ gpu.wait [%3]
+ %alloc = memref.alloc() : memref<2x2x2xf32>
+ memref.copy %memref_2, %alloc : memref<2x2x2xf32> to memref<2x2x2xf32>
+ %4 = gpu.wait async
+ %5 = gpu.dealloc async [%4] %memref_2 : memref<2x2x2xf32>
+ %6 = gpu.dealloc async [%5] %memref_0 : memref<2x2x2xf32>
+ %7 = gpu.dealloc async [%6] %mem : memref<2x2x2xf32>
+ gpu.wait [%7]
+ return %alloc : memref<2x2x2xf32>
+ }
+ gpu.module @test_kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} {
+ gpu.func @test_kernel(%arg0: memref<2x2x2xf32>, %arg1: memref<2x2x2xf32>, %arg2: memref<2x2x2xf32>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 2, 2, 2>, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
+ %0 = gpu.block_id x
+ %1 = gpu.block_id y
+ %2 = gpu.block_id z
+ %3 = memref.load %arg0[%0, %1, %2] : memref<2x2x2xf32>
+ %4 = memref.load %arg1[%0, %1, %2] : memref<2x2x2xf32>
+ %5 = arith.addf %3, %4 : f32
+ memref.store %5, %arg2[%0, %1, %2] : memref<2x2x2xf32>
+ gpu.return
+ }
+ }
+ // CHECK: [2.3, 4.5]
+ // CHECK: [7.8, 10.2]
+ // CHECK: [12.7, 14.9]
+ // CHECK: [18.2, 20.6]
+}
diff --git a/mlir/test/Integration/GPU/SYCL/gpu-to-spirv.mlir b/mlir/test/Integration/GPU/SYCL/gpu-addi64-to-spirv.mlir
similarity index 100%
rename from mlir/test/Integration/GPU/SYCL/gpu-to-spirv.mlir
rename to mlir/test/Integration/GPU/SYCL/gpu-addi64-to-spirv.mlir
diff --git a/mlir/test/Integration/GPU/SYCL/gpu-reluf32-to-spirv.mlir b/mlir/test/Integration/GPU/SYCL/gpu-reluf32-to-spirv.mlir
new file mode 100644
index 000000000000000..162a793305e9725
--- /dev/null
+++ b/mlir/test/Integration/GPU/SYCL/gpu-reluf32-to-spirv.mlir
@@ -0,0 +1,79 @@
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-cf-to-llvm,convert-arith-to-llvm,convert-math-to-llvm,convert-func-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,finalize-memref-to-llvm,reconcile-unrealized-casts)' \
+// RUN: | mlir-cpu-runner \
+// RUN: --shared-libs=%mlir_sycl_runtime \
+// RUN: --shared-libs=%mlir_runner_utils \
+// RUN: --entry-point-result=void \
+// RUN: | FileCheck %s
+
+module @relu attributes {gpu.container_module} {
+ memref.global "private" constant @__constant_4x5xf32 : memref<4x5xf32> = dense<[
+ [-1.000000e-01, -2.000000e-01, -3.000000e-01, 4.000000e-01, 5.000000e-01],
+ [1.000000e-01, -2.000000e-01, 3.000000e-01, -4.000000e-01, 5.000000e-01],
+ [1.000000e-01, 2.000000e-01, 3.000000e-01, -4.000000e-01, -5.000000e-01],
+ [1.000000e-01, 2.000000e-01, 3.000000e-01, 4.000000e-01, 5.000000e-01]
+ ]>
+
+ func.func @main() {
+ %c1 = arith.constant 1 : index
+ %c100 = arith.constant 100 : index
+ %c0 = arith.constant 0 : index
+ %0 = memref.get_global @__constant_4x5xf32 : memref<4x5xf32>
+
+ scf.for %arg0 = %c0 to %c100 step %c1 {
+ %1 = func.call @test(%0) : (memref<4x5xf32>) -> memref<4x5xf32>
+ %cast = memref.cast %1 : memref<4x5xf32> to memref<*xf32>
+ func.call @printMemrefF32(%cast) : (memref<*xf32>) -> ()
+ // CHECK: [0, 0, 0, 0.4, 0.5],
+ // CHECK: [0.1, 0, 0.3, 0, 0.5],
+ // CHECK: [0.1, 0.2, 0.3, 0, 0],
+ // CHECK: [0.1, 0.2, 0.3, 0.4, 0.5]
+ }
+ return
+ }
+
+ func.func private @printMemrefF32(memref<*xf32>)
+ func.func @test(%arg0: memref<4x5xf32>) -> memref<4x5xf32> {
+ %c5 = arith.constant 5 : index
+ %c4 = arith.constant 4 : index
+ %cst = arith.constant 0.000000e+00 : f32
+ %c1 = arith.constant 1 : index
+ %memref = gpu.alloc host_shared () : memref<4x5xf32>
+ memref.copy %arg0, %memref : memref<4x5xf32> to memref<4x5xf32>
+ %memref_0 = gpu.alloc host_shared () : memref<4x5xi1>
+ %2 = gpu.wait async
+ %3 = gpu.launch_func async [%2] @test_kernel::@test_kernel blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1) args(%memref : memref<4x5xf32>, %cst : f32, %memref_0 : memref<4x5xi1>)
+ gpu.wait [%3]
+ %memref_1 = gpu.alloc host_shared () : memref<4x5xf32>
+ %4 = gpu.wait async
+ %5 = gpu.launch_func async [%4] @test_kernel_0::@test_kernel blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1) args(%memref_0 : memref<4x5xi1>, %memref : memref<4x5xf32>, %cst : f32, %memref_1 : memref<4x5xf32>)
+ gpu.wait [%5]
+ %alloc = memref.alloc() : memref<4x5xf32>
+ memref.copy %memref_1, %alloc : memref<4x5xf32> to memref<4x5xf32>
+ %6 = gpu.wait async
+ %7 = gpu.dealloc async [%6] %memref_1 : memref<4x5xf32>
+ %8 = gpu.dealloc async [%7] %memref_0 : memref<4x5xi1>
+ %9 = gpu.dealloc async [%8] %memref : memref<4x5xf32>
+ return %alloc : memref<4x5xf32>
+ }
+ gpu.module @test_kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Int8, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} {
+ gpu.func @test_kernel(%arg0: memref<4x5xf32>, %arg1: f32, %arg2: memref<4x5xi1>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 4, 5, 1>, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
+ %0 = gpu.block_id x
+ %1 = gpu.block_id y
+ %2 = memref.load %arg0[%0, %1] : memref<4x5xf32>
+ %3 = arith.cmpf olt, %2, %arg1 : f32
+ memref.store %3, %arg2[%0, %1] : memref<4x5xi1>
+ gpu.return
+ }
+ }
+ gpu.module @test_kernel_0 attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Int8, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} {
+ gpu.func @test_kernel(%arg0: memref<4x5xi1>, %arg1: memref<4x5xf32>, %arg2: f32, %arg3: memref<4x5xf32>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 4, 5, 1>, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
+ %0 = gpu.block_id x
+ %1 = gpu.block_id y
+ %2 = memref.load %arg0[%0, %1] : memref<4x5xi1>
+ %3 = memref.load %arg1[%0, %1] : memref<4x5xf32>
+ %4 = arith.select %2, %arg2, %3 : f32
+ memref.store %4, %arg3[%0, %1] : memref<4x5xf32>
+ gpu.return
+ }
+ }
+}
>From 95559efd242d3b4b98939fcae818bb1b7b36af28 Mon Sep 17 00:00:00 2001
From: "Lee, Sang Ik" <sang.ik.lee at intel.com>
Date: Mon, 13 Nov 2023 23:11:11 +0000
Subject: [PATCH 4/5] Reorder code.
---
mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp
index 54947c16f5c561f..80f3d725b55db5f 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp
@@ -379,9 +379,6 @@ llvm::LaunchKernel::createKernelLaunch(mlir::gpu::LaunchFuncOp op,
if (!binary)
return op.emitError() << "Couldn't find the binary: " << binaryIdentifier;
- llvm::Constant *paramsCount =
- llvm::ConstantInt::get(i64Ty, op.getNumKernelOperands());
-
auto binaryVar = dyn_cast<llvm::GlobalVariable>(binary);
llvm::Constant *binaryInit = binaryVar->getInitializer();
auto binaryDataSeq = dyn_cast<llvm::ConstantDataSequential>(binaryInit);
@@ -411,6 +408,9 @@ llvm::LaunchKernel::createKernelLaunch(mlir::gpu::LaunchFuncOp op,
stream = builder.CreateCall(getStreamCreateFn(), {});
}
+ llvm::Constant *paramsCount =
+ llvm::ConstantInt::get(i64Ty, op.getNumKernelOperands());
+
// Create the launch call.
Value *nullPtr = ConstantPointerNull::get(ptrTy);
builder.CreateCall(getKernelLaunchFn(),
>From 2783bc1245f9acf6e1e510ad66ec4564ac2abb23 Mon Sep 17 00:00:00 2001
From: "Lee, Sang Ik" <sang.ik.lee at intel.com>
Date: Tue, 14 Nov 2023 19:27:56 +0000
Subject: [PATCH 5/5] Address reviewer comments.
---
.../LLVMIR/Dialect/GPU/SelectObjectAttr.cpp | 9 ++++-
.../GPU/SYCL/gpu-addf32-to-spirv.mlir | 36 +++++++++----------
.../GPU/SYCL/gpu-addi64-to-spirv.mlir | 36 +++++++++----------
mlir/test/Target/LLVMIR/gpu.mlir | 1 -
4 files changed, 44 insertions(+), 38 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp
index 80f3d725b55db5f..270daea0a0737ec 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp
@@ -380,8 +380,15 @@ llvm::LaunchKernel::createKernelLaunch(mlir::gpu::LaunchFuncOp op,
return op.emitError() << "Couldn't find the binary: " << binaryIdentifier;
auto binaryVar = dyn_cast<llvm::GlobalVariable>(binary);
+ if (!binaryVar)
+ return op.emitError() << "Binary is not a global variable: "
+ << binaryIdentifier;
llvm::Constant *binaryInit = binaryVar->getInitializer();
- auto binaryDataSeq = dyn_cast<llvm::ConstantDataSequential>(binaryInit);
+ auto binaryDataSeq =
+ dyn_cast_if_present<llvm::ConstantDataSequential>(binaryInit);
+ if (!binaryDataSeq)
+ return op.emitError() << "Couldn't find binary data array: "
+ << binaryIdentifier;
llvm::Constant *binarySize =
llvm::ConstantInt::get(i64Ty, binaryDataSeq->getNumElements() *
binaryDataSeq->getElementByteSize());
diff --git a/mlir/test/Integration/GPU/SYCL/gpu-addf32-to-spirv.mlir b/mlir/test/Integration/GPU/SYCL/gpu-addf32-to-spirv.mlir
index 113a49425de5445..c0e2903aee2d125 100644
--- a/mlir/test/Integration/GPU/SYCL/gpu-addf32-to-spirv.mlir
+++ b/mlir/test/Integration/GPU/SYCL/gpu-addf32-to-spirv.mlir
@@ -18,24 +18,24 @@ module @add attributes {gpu.container_module} {
}
func.func private @printMemrefF32(memref<*xf32>)
func.func @test(%arg0: memref<2x2x2xf32>, %arg1: memref<2x2x2xf32>) -> memref<2x2x2xf32> {
- %c2 = arith.constant 2 : index
- %c1 = arith.constant 1 : index
- %mem = gpu.alloc host_shared () : memref<2x2x2xf32>
- memref.copy %arg1, %mem : memref<2x2x2xf32> to memref<2x2x2xf32>
- %memref_0 = gpu.alloc host_shared () : memref<2x2x2xf32>
- memref.copy %arg0, %memref_0 : memref<2x2x2xf32> to memref<2x2x2xf32>
- %memref_2 = gpu.alloc host_shared () : memref<2x2x2xf32>
- %2 = gpu.wait async
- %3 = gpu.launch_func async [%2] @test_kernel::@test_kernel blocks in (%c2, %c2, %c2) threads in (%c1, %c1, %c1) args(%memref_0 : memref<2x2x2xf32>, %mem : memref<2x2x2xf32>, %memref_2 : memref<2x2x2xf32>)
- gpu.wait [%3]
- %alloc = memref.alloc() : memref<2x2x2xf32>
- memref.copy %memref_2, %alloc : memref<2x2x2xf32> to memref<2x2x2xf32>
- %4 = gpu.wait async
- %5 = gpu.dealloc async [%4] %memref_2 : memref<2x2x2xf32>
- %6 = gpu.dealloc async [%5] %memref_0 : memref<2x2x2xf32>
- %7 = gpu.dealloc async [%6] %mem : memref<2x2x2xf32>
- gpu.wait [%7]
- return %alloc : memref<2x2x2xf32>
+ %c2 = arith.constant 2 : index
+ %c1 = arith.constant 1 : index
+ %mem = gpu.alloc host_shared () : memref<2x2x2xf32>
+ memref.copy %arg1, %mem : memref<2x2x2xf32> to memref<2x2x2xf32>
+ %memref_0 = gpu.alloc host_shared () : memref<2x2x2xf32>
+ memref.copy %arg0, %memref_0 : memref<2x2x2xf32> to memref<2x2x2xf32>
+ %memref_2 = gpu.alloc host_shared () : memref<2x2x2xf32>
+ %2 = gpu.wait async
+ %3 = gpu.launch_func async [%2] @test_kernel::@test_kernel blocks in (%c2, %c2, %c2) threads in (%c1, %c1, %c1) args(%memref_0 : memref<2x2x2xf32>, %mem : memref<2x2x2xf32>, %memref_2 : memref<2x2x2xf32>)
+ gpu.wait [%3]
+ %alloc = memref.alloc() : memref<2x2x2xf32>
+ memref.copy %memref_2, %alloc : memref<2x2x2xf32> to memref<2x2x2xf32>
+ %4 = gpu.wait async
+ %5 = gpu.dealloc async [%4] %memref_2 : memref<2x2x2xf32>
+ %6 = gpu.dealloc async [%5] %memref_0 : memref<2x2x2xf32>
+ %7 = gpu.dealloc async [%6] %mem : memref<2x2x2xf32>
+ gpu.wait [%7]
+ return %alloc : memref<2x2x2xf32>
}
gpu.module @test_kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} {
gpu.func @test_kernel(%arg0: memref<2x2x2xf32>, %arg1: memref<2x2x2xf32>, %arg2: memref<2x2x2xf32>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 2, 2, 2>, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
diff --git a/mlir/test/Integration/GPU/SYCL/gpu-addi64-to-spirv.mlir b/mlir/test/Integration/GPU/SYCL/gpu-addi64-to-spirv.mlir
index bc6f3cea080df20..4ac1533b75d2034 100644
--- a/mlir/test/Integration/GPU/SYCL/gpu-addi64-to-spirv.mlir
+++ b/mlir/test/Integration/GPU/SYCL/gpu-addi64-to-spirv.mlir
@@ -18,24 +18,24 @@ module @add attributes {gpu.container_module} {
}
func.func private @printMemrefI64(memref<*xi64>)
func.func @test(%arg0: memref<3x3xi64>, %arg1: memref<3x3xi64>) -> memref<3x3xi64> {
- %c3 = arith.constant 3 : index
- %c1 = arith.constant 1 : index
- %mem = gpu.alloc host_shared () : memref<3x3xi64>
- memref.copy %arg1, %mem : memref<3x3xi64> to memref<3x3xi64>
- %memref_0 = gpu.alloc host_shared () : memref<3x3xi64>
- memref.copy %arg0, %memref_0 : memref<3x3xi64> to memref<3x3xi64>
- %memref_2 = gpu.alloc host_shared () : memref<3x3xi64>
- %2 = gpu.wait async
- %3 = gpu.launch_func async [%2] @test_kernel::@test_kernel blocks in (%c3, %c3, %c1) threads in (%c1, %c1, %c1) args(%memref_0 : memref<3x3xi64>, %mem : memref<3x3xi64>, %memref_2 : memref<3x3xi64>)
- gpu.wait [%3]
- %alloc = memref.alloc() : memref<3x3xi64>
- memref.copy %memref_2, %alloc : memref<3x3xi64> to memref<3x3xi64>
- %4 = gpu.wait async
- %5 = gpu.dealloc async [%4] %memref_2 : memref<3x3xi64>
- %6 = gpu.dealloc async [%5] %memref_0 : memref<3x3xi64>
- %7 = gpu.dealloc async [%6] %mem : memref<3x3xi64>
- gpu.wait [%7]
- return %alloc : memref<3x3xi64>
+ %c3 = arith.constant 3 : index
+ %c1 = arith.constant 1 : index
+ %mem = gpu.alloc host_shared () : memref<3x3xi64>
+ memref.copy %arg1, %mem : memref<3x3xi64> to memref<3x3xi64>
+ %memref_0 = gpu.alloc host_shared () : memref<3x3xi64>
+ memref.copy %arg0, %memref_0 : memref<3x3xi64> to memref<3x3xi64>
+ %memref_2 = gpu.alloc host_shared () : memref<3x3xi64>
+ %2 = gpu.wait async
+ %3 = gpu.launch_func async [%2] @test_kernel::@test_kernel blocks in (%c3, %c3, %c1) threads in (%c1, %c1, %c1) args(%memref_0 : memref<3x3xi64>, %mem : memref<3x3xi64>, %memref_2 : memref<3x3xi64>)
+ gpu.wait [%3]
+ %alloc = memref.alloc() : memref<3x3xi64>
+ memref.copy %memref_2, %alloc : memref<3x3xi64> to memref<3x3xi64>
+ %4 = gpu.wait async
+ %5 = gpu.dealloc async [%4] %memref_2 : memref<3x3xi64>
+ %6 = gpu.dealloc async [%5] %memref_0 : memref<3x3xi64>
+ %7 = gpu.dealloc async [%6] %mem : memref<3x3xi64>
+ gpu.wait [%7]
+ return %alloc : memref<3x3xi64>
}
gpu.module @test_kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} {
gpu.func @test_kernel(%arg0: memref<3x3xi64>, %arg1: memref<3x3xi64>, %arg2: memref<3x3xi64>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 3, 3, 1>, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
diff --git a/mlir/test/Target/LLVMIR/gpu.mlir b/mlir/test/Target/LLVMIR/gpu.mlir
index 8a3fc13e0b9af71..7afc8a4dc7e87f0 100644
--- a/mlir/test/Target/LLVMIR/gpu.mlir
+++ b/mlir/test/Target/LLVMIR/gpu.mlir
@@ -4,7 +4,6 @@
module attributes {gpu.container_module} {
// CHECK: [[ARGS_TY:%.*]] = type { i32, i32 }
// CHECK: @kernel_module_bin_cst = internal constant [4 x i8] c"BLOB", align 8
- // CHECK: @kernel_module_bin_size_cst = internal constant i64 4, align 8
// CHECK: @kernel_module_kernel_kernel_name = private unnamed_addr constant [7 x i8] c"kernel\00", align 1
gpu.binary @kernel_module [#gpu.object<#nvvm.target, "BLOB">]
llvm.func @foo() {
More information about the flang-commits
mailing list