[Mlir-commits] [mlir] [MLIR][GPU-LLVM] Add in-pass signature update option for opencl kernels (PR #105664)

Thu Aug 22 07:13:55 PDT 2024

https://github.com/kurapov-peter created https://github.com/llvm/llvm-project/pull/105664

This PR introduces a new option `force-opencl-address-spaces` to the `gpu-to-llvm-spv` conversion pass. It addresses a problem with signature incompatibility between `gpu.launch_func` and the kernel arguments when using a unified shared memory with OpenCL runtime. The discrepancy comes from the fact that the global address space is defined as `1` in OpenCL spec.

Note: this is a draft showing an alternative solution to the one proposed in https://github.com/llvm/llvm-project/pull/102925. Currently, it lacks the differentiation between kernels and non-kernel functions (generic address space should be attached to the latter one's arguments). I'll add it if we decide the approach is acceptable.

>From 2e8465cc42eb3cec6b4b94e5a01fc8bae0fb62c0 Mon Sep 17 00:00:00 2001
From: Petr Kurapov <petr.a.kurapov at intel.com>
Date: Mon, 19 Aug 2024 14:03:44 +0000
Subject: [PATCH] [MLIR][GPU-LLVM] Add in-pass signature update option for
 opencl kernels

---
 mlir/include/mlir/Conversion/Passes.td        |  3 +
 .../Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp  | 59 +++++++++++++++++++
 .../GPUToLLVMSPV/gpu-to-llvm-spv.mlir         | 14 +++++
 3 files changed, 76 insertions(+)

diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index 7bde9e490e4f4e..05f07421b8f526 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -542,6 +542,9 @@ def ConvertGpuOpsToLLVMSPVOps : Pass<"convert-gpu-to-llvm-spv", "gpu::GPUModuleO
     Option<"indexBitwidth", "index-bitwidth", "unsigned",
            /*default=kDeriveIndexBitwidthFromDataLayout*/"0",
            "Bitwidth of the index type, 0 to use size of machine word">,
+    Option<"forceOpenclAddressSpaces", "force-opencl-address-spaces",
+           "bool", /*default=*/"false",
+           "Force kernel argument pointers to have address space global.">,
   ];
 }
 
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index ced4236402923a..25cf6560257978 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -306,6 +306,51 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
   }
 };
 
+class MemorySpaceToOpenCLMemorySpaceConverter : public TypeConverter {
+public:
+  explicit MemorySpaceToOpenCLMemorySpaceConverter() {
+    addConversion([](Type t) { return t; });
+    addConversion(
+        [this](BaseMemRefType memRefType) -> std::optional<Type> {
+          std::optional<gpu::AddressSpace> addrSpace =
+              memorySpaceMap(memRefType.getMemorySpace());
+          if (!addrSpace) {
+            LLVM_DEBUG(
+                llvm::dbgs()
+                << "cannot convert " << memRefType
+                << " due to being unable to find address space in the map\n");
+            return std::nullopt;
+          }
+          auto addrSpaceAttr =
+              gpu::AddressSpaceAttr::get(memRefType.getContext(), *addrSpace);
+          if (auto rankedType = dyn_cast<MemRefType>(memRefType)) {
+            return MemRefType::get(memRefType.getShape(),
+                                   memRefType.getElementType(),
+                                   rankedType.getLayout(), addrSpaceAttr);
+          }
+          return UnrankedMemRefType::get(memRefType.getElementType(),
+                                         addrSpaceAttr);
+        });
+    addConversion([this](FunctionType type) {
+      auto inputs = llvm::map_to_vector(
+          type.getInputs(), [this](Type ty) { return convertType(ty); });
+      auto results = llvm::map_to_vector(
+          type.getResults(), [this](Type ty) { return convertType(ty); });
+      return FunctionType::get(type.getContext(), inputs, results);
+    });
+  }
+
+private:
+  std::optional<gpu::AddressSpace> memorySpaceMap(Attribute memSpaceAttr) {
+    if (!memSpaceAttr)
+      return gpu::AddressSpace::Global;
+    auto gpuAddrSpace = dyn_cast<gpu::AddressSpaceAttr>(memSpaceAttr);
+    if (!gpuAddrSpace)
+      return std::nullopt;
+    return gpuAddrSpace.getValue();
+  }
+};
+
 //===----------------------------------------------------------------------===//
 // GPU To LLVM-SPV Pass.
 //===----------------------------------------------------------------------===//
@@ -325,6 +370,20 @@ struct GPUToLLVMSPVConversionPass final
     LLVMTypeConverter converter(context, options);
     LLVMConversionTarget target(*context);
 
+    if (forceOpenclAddressSpaces) {
+      MemorySpaceToOpenCLMemorySpaceConverter converter;
+      AttrTypeReplacer replacer;
+      replacer.addReplacement([&converter](BaseMemRefType origType)
+                                  -> std::optional<BaseMemRefType> {
+        return converter.convertType<BaseMemRefType>(origType);
+      });
+
+      replacer.recursivelyReplaceElementsIn(getOperation(),
+                                            /*replaceAttrs=*/true,
+                                            /*replaceLocs=*/false,
+                                            /*replaceTypes=*/true);
+    }
+
     target.addIllegalOp<gpu::BarrierOp, gpu::BlockDimOp, gpu::BlockIdOp,
                         gpu::GPUFuncOp, gpu::GlobalIdOp, gpu::GridDimOp,
                         gpu::ReturnOp, gpu::ShuffleOp, gpu::ThreadIdOp>();
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index ec4f4a304d5073..d100f36ae42521 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -2,6 +2,8 @@
 // RUN: | FileCheck --check-prefixes=CHECK-64,CHECK %s
 // RUN: mlir-opt -pass-pipeline="builtin.module(gpu.module(convert-gpu-to-llvm-spv{index-bitwidth=32}))" -split-input-file -verify-diagnostics %s \
 // RUN: | FileCheck --check-prefixes=CHECK-32,CHECK %s
+// RUN: mlir-opt -pass-pipeline="builtin.module(gpu.module(convert-gpu-to-llvm-spv{force-opencl-address-spaces}))" -split-input-file -verify-diagnostics %s \
+// RUN: | FileCheck --check-prefixes=OPENCL %s
 
 gpu.module @builtins {
   // CHECK-64:        llvm.func spir_funccc @_Z14get_num_groupsj(i32) -> i64 attributes {
@@ -515,3 +517,15 @@ gpu.module @kernels {
     gpu.return
   }
 }
+
+// -----
+
+gpu.module @kernels {
+// OPENCL-LABEL:   llvm.func spir_funccc @no_address_spaces(
+// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+  gpu.func @no_address_spaces(%arg0: memref<f32>, %arg1: memref<f32, #gpu.address_space<global>>, %arg2: memref<f32>) {
+    gpu.return
+  }
+}