[Mlir-commits] [mlir] f8b7a65 - [MLIR][GPU-LLVM] Add in-pass signature update for opencl kernels (#105664)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Thu Oct 10 05:04:56 PDT 2024
Author: Petr Kurapov
Date: 2024-10-10T14:04:52+02:00
New Revision: f8b7a65395a07073feff367145965214d95ba99a
URL: https://github.com/llvm/llvm-project/commit/f8b7a65395a07073feff367145965214d95ba99a
DIFF: https://github.com/llvm/llvm-project/commit/f8b7a65395a07073feff367145965214d95ba99a.diff
LOG: [MLIR][GPU-LLVM] Add in-pass signature update for opencl kernels (#105664)
Default to Global address space for memrefs that do not have an explicit address space set in the IR.
---------
Co-authored-by: Victor Perez <victor.perez at intel.com>
Co-authored-by: Jakub Kuderski <kubakuderski at gmail.com>
Co-authored-by: Victor Perez <victor.perez at codeplay.com>
Added:
Modified:
mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 544f1f4a4f6a79..bb6a38c0e76edf 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -34,6 +34,8 @@
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/FormatVariadic.h"
+#define DEBUG_TYPE "gpu-to-llvm-spv"
+
using namespace mlir;
namespace mlir {
@@ -316,6 +318,38 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
}
};
+class MemorySpaceToOpenCLMemorySpaceConverter final : public TypeConverter {
+public:
+ MemorySpaceToOpenCLMemorySpaceConverter(MLIRContext *ctx) {
+ addConversion([](Type t) { return t; });
+ addConversion([ctx](BaseMemRefType memRefType) -> std::optional<Type> {
+ // Attach global addr space attribute to memrefs with no addr space attr
+ Attribute memSpaceAttr = memRefType.getMemorySpace();
+ if (memSpaceAttr)
+ return std::nullopt;
+
+ unsigned globalAddrspace = storageClassToAddressSpace(
+ spirv::ClientAPI::OpenCL, spirv::StorageClass::CrossWorkgroup);
+ Attribute addrSpaceAttr =
+ IntegerAttr::get(IntegerType::get(ctx, 64), globalAddrspace);
+ if (auto rankedType = dyn_cast<MemRefType>(memRefType)) {
+ return MemRefType::get(memRefType.getShape(),
+ memRefType.getElementType(),
+ rankedType.getLayout(), addrSpaceAttr);
+ }
+ return UnrankedMemRefType::get(memRefType.getElementType(),
+ addrSpaceAttr);
+ });
+ addConversion([this](FunctionType type) {
+ auto inputs = llvm::map_to_vector(
+ type.getInputs(), [this](Type ty) { return convertType(ty); });
+ auto results = llvm::map_to_vector(
+ type.getResults(), [this](Type ty) { return convertType(ty); });
+ return FunctionType::get(type.getContext(), inputs, results);
+ });
+ }
+};
+
//===----------------------------------------------------------------------===//
// Subgroup query ops.
//===----------------------------------------------------------------------===//
@@ -382,6 +416,21 @@ struct GPUToLLVMSPVConversionPass final
LLVMTypeConverter converter(context, options);
LLVMConversionTarget target(*context);
+ // Force OpenCL address spaces when they are not present
+ {
+ MemorySpaceToOpenCLMemorySpaceConverter converter(context);
+ AttrTypeReplacer replacer;
+ replacer.addReplacement([&converter](BaseMemRefType origType)
+ -> std::optional<BaseMemRefType> {
+ return converter.convertType<BaseMemRefType>(origType);
+ });
+
+ replacer.recursivelyReplaceElementsIn(getOperation(),
+ /*replaceAttrs=*/true,
+ /*replaceLocs=*/false,
+ /*replaceTypes=*/true);
+ }
+
target.addIllegalOp<gpu::BarrierOp, gpu::BlockDimOp, gpu::BlockIdOp,
gpu::GPUFuncOp, gpu::GlobalIdOp, gpu::GridDimOp,
gpu::LaneIdOp, gpu::NumSubgroupsOp, gpu::ReturnOp,
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index 910105ddf69586..c143d030ed362b 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -444,20 +444,20 @@ gpu.module @kernels {
gpu.return
}
- // CHECK-64: llvm.func spir_kernelcc @kernel_with_conv_args(%{{.*}}: i64, %{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i64) attributes {gpu.kernel} {
- // CHECK-32: llvm.func spir_kernelcc @kernel_with_conv_args(%{{.*}}: i32, %{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i32) attributes {gpu.kernel} {
+ // CHECK-64: llvm.func spir_kernelcc @kernel_with_conv_args(%{{.*}}: i64, %{{.*}}: !llvm.ptr<1>, %{{.*}}: !llvm.ptr<1>, %{{.*}}: i64) attributes {gpu.kernel} {
+ // CHECK-32: llvm.func spir_kernelcc @kernel_with_conv_args(%{{.*}}: i32, %{{.*}}: !llvm.ptr<1>, %{{.*}}: !llvm.ptr<1>, %{{.*}}: i32) attributes {gpu.kernel} {
gpu.func @kernel_with_conv_args(%arg0: index, %arg1: memref<index>) kernel {
gpu.return
}
- // CHECK-64: llvm.func spir_kernelcc @kernel_with_sized_memref(%{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64) attributes {gpu.kernel} {
- // CHECK-32: llvm.func spir_kernelcc @kernel_with_sized_memref(%{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32) attributes {gpu.kernel} {
+ // CHECK-64: llvm.func spir_kernelcc @kernel_with_sized_memref(%{{.*}}: !llvm.ptr<1>, %{{.*}}: !llvm.ptr<1>, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64) attributes {gpu.kernel} {
+ // CHECK-32: llvm.func spir_kernelcc @kernel_with_sized_memref(%{{.*}}: !llvm.ptr<1>, %{{.*}}: !llvm.ptr<1>, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32) attributes {gpu.kernel} {
gpu.func @kernel_with_sized_memref(%arg0: memref<1xindex>) kernel {
gpu.return
}
- // CHECK-64: llvm.func spir_kernelcc @kernel_with_ND_memref(%{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64) attributes {gpu.kernel} {
- // CHECK-32: llvm.func spir_kernelcc @kernel_with_ND_memref(%{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32) attributes {gpu.kernel} {
+ // CHECK-64: llvm.func spir_kernelcc @kernel_with_ND_memref(%{{.*}}: !llvm.ptr<1>, %{{.*}}: !llvm.ptr<1>, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64) attributes {gpu.kernel} {
+ // CHECK-32: llvm.func spir_kernelcc @kernel_with_ND_memref(%{{.*}}: !llvm.ptr<1>, %{{.*}}: !llvm.ptr<1>, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32) attributes {gpu.kernel} {
gpu.func @kernel_with_ND_memref(%arg0: memref<128x128x128xindex>) kernel {
gpu.return
}
@@ -566,6 +566,44 @@ gpu.module @kernels {
// -----
+gpu.module @kernels {
+// CHECK: llvm.func spir_funccc @_Z12get_group_idj(i32)
+// CHECK-LABEL: llvm.func spir_funccc @no_address_spaces(
+// CHECK-SAME: %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// CHECK-SAME: %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// CHECK-SAME: %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+ gpu.func @no_address_spaces(%arg0: memref<f32>, %arg1: memref<f32, #gpu.address_space<global>>, %arg2: memref<f32>) {
+ gpu.return
+ }
+
+// CHECK-LABEL: llvm.func spir_kernelcc @no_address_spaces_complex(
+// CHECK-SAME: %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// CHECK-SAME: %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// CHECK: func.call @no_address_spaces_callee(%{{[0-9]+}}, %{{[0-9]+}})
+// CHECK-SAME: : (memref<2x2xf32, 1>, memref<4xf32, 1>)
+ gpu.func @no_address_spaces_complex(%arg0: memref<2x2xf32>, %arg1: memref<4xf32>) kernel {
+ func.call @no_address_spaces_callee(%arg0, %arg1) : (memref<2x2xf32>, memref<4xf32>) -> ()
+ gpu.return
+ }
+// CHECK-LABEL: func.func @no_address_spaces_callee(
+// CHECK-SAME: [[ARG0:%.*]]: memref<2x2xf32, 1>
+// CHECK-SAME: [[ARG1:%.*]]: memref<4xf32, 1>
+// CHECK: [[C0:%.*]] = llvm.mlir.constant(0 : i32) : i32
+// CHECK: [[I0:%.*]] = llvm.call spir_funccc @_Z12get_group_idj([[C0]]) {
+// CHECK-32: [[I1:%.*]] = builtin.unrealized_conversion_cast [[I0]] : i32 to index
+// CHECK-64: [[I1:%.*]] = builtin.unrealized_conversion_cast [[I0]] : i64 to index
+// CHECK: [[LD:%.*]] = memref.load [[ARG0]]{{\[}}[[I1]], [[I1]]{{\]}} : memref<2x2xf32, 1>
+// CHECK: memref.store [[LD]], [[ARG1]]{{\[}}[[I1]]{{\]}} : memref<4xf32, 1>
+ func.func @no_address_spaces_callee(%arg0: memref<2x2xf32>, %arg1: memref<4xf32>) {
+ %block_id = gpu.block_id x
+ %0 = memref.load %arg0[%block_id, %block_id] : memref<2x2xf32>
+ memref.store %0, %arg1[%block_id] : memref<4xf32>
+ func.return
+ }
+}
+
+// -----
+
// Lowering of subgroup query operations
// CHECK-DAG: llvm.func spir_funccc @_Z18get_sub_group_size() -> i32 attributes {no_unwind, will_return}
More information about the Mlir-commits
mailing list