[Mlir-commits] [mlir] [MLIR][GPU-LLVM] Add in-pass signature update option for opencl kernels (PR #105664)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Mon Aug 26 04:55:25 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-mlir-gpu

Author: Petr Kurapov (kurapov-peter)

<details>
<summary>Changes</summary>

This PR introduces a new option `force-opencl-address-spaces` to the `gpu-to-llvm-spv` conversion pass. It addresses a problem with signature incompatibility between `gpu.launch_func` and the kernel arguments when using a unified shared memory with OpenCL runtime. The discrepancy comes from the fact that the global address space is defined as `1` in OpenCL spec.

Note: this is a draft showing an alternative solution to the one proposed in https://github.com/llvm/llvm-project/pull/102925. Currently, it lacks the differentiation between kernels and non-kernel functions (generic address space should be attached to the latter one's arguments). I'll add it if we decide the approach is acceptable.

Edit: after some discussions, the conversion to generic address spaces seem to be unnecessary. The assumption is that if there was a function that required specific handling (e.g., should accept pointers from shared space) it is user's responsibility to put the correct attribute on it. I'm leaving the conversion to only replace None with Global for now until (and if) we find any problems with it.

---
Full diff: https://github.com/llvm/llvm-project/pull/105664.diff


3 Files Affected:

- (modified) mlir/include/mlir/Conversion/Passes.td (+3) 
- (modified) mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp (+58) 
- (modified) mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir (+14) 


``````````diff
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index 7bde9e490e4f4e..05f07421b8f526 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -542,6 +542,9 @@ def ConvertGpuOpsToLLVMSPVOps : Pass<"convert-gpu-to-llvm-spv", "gpu::GPUModuleO
     Option<"indexBitwidth", "index-bitwidth", "unsigned",
            /*default=kDeriveIndexBitwidthFromDataLayout*/"0",
            "Bitwidth of the index type, 0 to use size of machine word">,
+    Option<"forceOpenclAddressSpaces", "force-opencl-address-spaces",
+           "bool", /*default=*/"false",
+           "Force kernel argument pointers to have address space global.">,
   ];
 }
 
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index ced4236402923a..a9c6e76b506e61 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -34,6 +34,8 @@
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/FormatVariadic.h"
 
+#define DEBUG_TYPE "gpu-to-llvm-spv"
+
 using namespace mlir;
 
 namespace mlir {
@@ -306,6 +308,48 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
   }
 };
 
+class MemorySpaceToOpenCLMemorySpaceConverter final : public TypeConverter {
+public:
+  MemorySpaceToOpenCLMemorySpaceConverter() {
+    addConversion([](Type t) { return t; });
+    addConversion(
+        [this](BaseMemRefType memRefType) -> std::optional<Type> {
+          std::optional<gpu::AddressSpace> addrSpace =
+              memorySpaceMap(memRefType.getMemorySpace());
+          if (!addrSpace) {
+            LLVM_DEBUG(
+                llvm::dbgs()
+                << "cannot convert " << memRefType
+                << " due to being unable to find address space in the map\n");
+            return std::nullopt;
+          }
+          auto addrSpaceAttr =
+              gpu::AddressSpaceAttr::get(memRefType.getContext(), *addrSpace);
+          if (auto rankedType = dyn_cast<MemRefType>(memRefType)) {
+            return MemRefType::get(memRefType.getShape(),
+                                   memRefType.getElementType(),
+                                   rankedType.getLayout(), addrSpaceAttr);
+          }
+          return UnrankedMemRefType::get(memRefType.getElementType(),
+                                         addrSpaceAttr);
+        });
+    addConversion([this](FunctionType type) {
+      auto inputs = llvm::map_to_vector(
+          type.getInputs(), [this](Type ty) { return convertType(ty); });
+      auto results = llvm::map_to_vector(
+          type.getResults(), [this](Type ty) { return convertType(ty); });
+      return FunctionType::get(type.getContext(), inputs, results);
+    });
+  }
+
+private:
+  std::optional<gpu::AddressSpace> memorySpaceMap(Attribute memSpaceAttr) {
+    if (!memSpaceAttr)
+      return gpu::AddressSpace::Global;
+    return std::nullopt;
+  }
+};
+
 //===----------------------------------------------------------------------===//
 // GPU To LLVM-SPV Pass.
 //===----------------------------------------------------------------------===//
@@ -325,6 +369,20 @@ struct GPUToLLVMSPVConversionPass final
     LLVMTypeConverter converter(context, options);
     LLVMConversionTarget target(*context);
 
+    if (forceOpenclAddressSpaces) {
+      MemorySpaceToOpenCLMemorySpaceConverter converter;
+      AttrTypeReplacer replacer;
+      replacer.addReplacement([&converter](BaseMemRefType origType)
+                                  -> std::optional<BaseMemRefType> {
+        return converter.convertType<BaseMemRefType>(origType);
+      });
+
+      replacer.recursivelyReplaceElementsIn(getOperation(),
+                                            /*replaceAttrs=*/true,
+                                            /*replaceLocs=*/false,
+                                            /*replaceTypes=*/true);
+    }
+
     target.addIllegalOp<gpu::BarrierOp, gpu::BlockDimOp, gpu::BlockIdOp,
                         gpu::GPUFuncOp, gpu::GlobalIdOp, gpu::GridDimOp,
                         gpu::ReturnOp, gpu::ShuffleOp, gpu::ThreadIdOp>();
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index ec4f4a304d5073..d100f36ae42521 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -2,6 +2,8 @@
 // RUN: | FileCheck --check-prefixes=CHECK-64,CHECK %s
 // RUN: mlir-opt -pass-pipeline="builtin.module(gpu.module(convert-gpu-to-llvm-spv{index-bitwidth=32}))" -split-input-file -verify-diagnostics %s \
 // RUN: | FileCheck --check-prefixes=CHECK-32,CHECK %s
+// RUN: mlir-opt -pass-pipeline="builtin.module(gpu.module(convert-gpu-to-llvm-spv{force-opencl-address-spaces}))" -split-input-file -verify-diagnostics %s \
+// RUN: | FileCheck --check-prefixes=OPENCL %s
 
 gpu.module @builtins {
   // CHECK-64:        llvm.func spir_funccc @_Z14get_num_groupsj(i32) -> i64 attributes {
@@ -515,3 +517,15 @@ gpu.module @kernels {
     gpu.return
   }
 }
+
+// -----
+
+gpu.module @kernels {
+// OPENCL-LABEL:   llvm.func spir_funccc @no_address_spaces(
+// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+  gpu.func @no_address_spaces(%arg0: memref<f32>, %arg1: memref<f32, #gpu.address_space<global>>, %arg2: memref<f32>) {
+    gpu.return
+  }
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/105664


More information about the Mlir-commits mailing list