[Mlir-commits] [mlir] [MLIR][GPU-LLVM] Add in-pass signature update option for opencl kernels (PR #105664)

Petr Kurapov llvmlistbot at llvm.org
Mon Sep 30 05:08:03 PDT 2024


https://github.com/kurapov-peter updated https://github.com/llvm/llvm-project/pull/105664

>From 2e8465cc42eb3cec6b4b94e5a01fc8bae0fb62c0 Mon Sep 17 00:00:00 2001
From: Petr Kurapov <petr.a.kurapov at intel.com>
Date: Mon, 19 Aug 2024 14:03:44 +0000
Subject: [PATCH 01/16] [MLIR][GPU-LLVM] Add in-pass signature update option
 for opencl kernels

---
 mlir/include/mlir/Conversion/Passes.td        |  3 +
 .../Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp  | 59 +++++++++++++++++++
 .../GPUToLLVMSPV/gpu-to-llvm-spv.mlir         | 14 +++++
 3 files changed, 76 insertions(+)

diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index 7bde9e490e4f4e..05f07421b8f526 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -542,6 +542,9 @@ def ConvertGpuOpsToLLVMSPVOps : Pass<"convert-gpu-to-llvm-spv", "gpu::GPUModuleO
     Option<"indexBitwidth", "index-bitwidth", "unsigned",
            /*default=kDeriveIndexBitwidthFromDataLayout*/"0",
            "Bitwidth of the index type, 0 to use size of machine word">,
+    Option<"forceOpenclAddressSpaces", "force-opencl-address-spaces",
+           "bool", /*default=*/"false",
+           "Force kernel argument pointers to have address space global.">,
   ];
 }
 
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index ced4236402923a..25cf6560257978 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -306,6 +306,51 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
   }
 };
 
+class MemorySpaceToOpenCLMemorySpaceConverter : public TypeConverter {
+public:
+  explicit MemorySpaceToOpenCLMemorySpaceConverter() {
+    addConversion([](Type t) { return t; });
+    addConversion(
+        [this](BaseMemRefType memRefType) -> std::optional<Type> {
+          std::optional<gpu::AddressSpace> addrSpace =
+              memorySpaceMap(memRefType.getMemorySpace());
+          if (!addrSpace) {
+            LLVM_DEBUG(
+                llvm::dbgs()
+                << "cannot convert " << memRefType
+                << " due to being unable to find address space in the map\n");
+            return std::nullopt;
+          }
+          auto addrSpaceAttr =
+              gpu::AddressSpaceAttr::get(memRefType.getContext(), *addrSpace);
+          if (auto rankedType = dyn_cast<MemRefType>(memRefType)) {
+            return MemRefType::get(memRefType.getShape(),
+                                   memRefType.getElementType(),
+                                   rankedType.getLayout(), addrSpaceAttr);
+          }
+          return UnrankedMemRefType::get(memRefType.getElementType(),
+                                         addrSpaceAttr);
+        });
+    addConversion([this](FunctionType type) {
+      auto inputs = llvm::map_to_vector(
+          type.getInputs(), [this](Type ty) { return convertType(ty); });
+      auto results = llvm::map_to_vector(
+          type.getResults(), [this](Type ty) { return convertType(ty); });
+      return FunctionType::get(type.getContext(), inputs, results);
+    });
+  }
+
+private:
+  std::optional<gpu::AddressSpace> memorySpaceMap(Attribute memSpaceAttr) {
+    if (!memSpaceAttr)
+      return gpu::AddressSpace::Global;
+    auto gpuAddrSpace = dyn_cast<gpu::AddressSpaceAttr>(memSpaceAttr);
+    if (!gpuAddrSpace)
+      return std::nullopt;
+    return gpuAddrSpace.getValue();
+  }
+};
+
 //===----------------------------------------------------------------------===//
 // GPU To LLVM-SPV Pass.
 //===----------------------------------------------------------------------===//
@@ -325,6 +370,20 @@ struct GPUToLLVMSPVConversionPass final
     LLVMTypeConverter converter(context, options);
     LLVMConversionTarget target(*context);
 
+    if (forceOpenclAddressSpaces) {
+      MemorySpaceToOpenCLMemorySpaceConverter converter;
+      AttrTypeReplacer replacer;
+      replacer.addReplacement([&converter](BaseMemRefType origType)
+                                  -> std::optional<BaseMemRefType> {
+        return converter.convertType<BaseMemRefType>(origType);
+      });
+
+      replacer.recursivelyReplaceElementsIn(getOperation(),
+                                            /*replaceAttrs=*/true,
+                                            /*replaceLocs=*/false,
+                                            /*replaceTypes=*/true);
+    }
+
     target.addIllegalOp<gpu::BarrierOp, gpu::BlockDimOp, gpu::BlockIdOp,
                         gpu::GPUFuncOp, gpu::GlobalIdOp, gpu::GridDimOp,
                         gpu::ReturnOp, gpu::ShuffleOp, gpu::ThreadIdOp>();
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index ec4f4a304d5073..d100f36ae42521 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -2,6 +2,8 @@
 // RUN: | FileCheck --check-prefixes=CHECK-64,CHECK %s
 // RUN: mlir-opt -pass-pipeline="builtin.module(gpu.module(convert-gpu-to-llvm-spv{index-bitwidth=32}))" -split-input-file -verify-diagnostics %s \
 // RUN: | FileCheck --check-prefixes=CHECK-32,CHECK %s
+// RUN: mlir-opt -pass-pipeline="builtin.module(gpu.module(convert-gpu-to-llvm-spv{force-opencl-address-spaces}))" -split-input-file -verify-diagnostics %s \
+// RUN: | FileCheck --check-prefixes=OPENCL %s
 
 gpu.module @builtins {
   // CHECK-64:        llvm.func spir_funccc @_Z14get_num_groupsj(i32) -> i64 attributes {
@@ -515,3 +517,15 @@ gpu.module @kernels {
     gpu.return
   }
 }
+
+// -----
+
+gpu.module @kernels {
+// OPENCL-LABEL:   llvm.func spir_funccc @no_address_spaces(
+// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+  gpu.func @no_address_spaces(%arg0: memref<f32>, %arg1: memref<f32, #gpu.address_space<global>>, %arg2: memref<f32>) {
+    gpu.return
+  }
+}

>From 9238460b818648defac1f0eaab5be0b2963af403 Mon Sep 17 00:00:00 2001
From: Petr Kurapov <petr.a.kurapov at intel.com>
Date: Thu, 22 Aug 2024 15:15:46 +0000
Subject: [PATCH 02/16] address review comment

---
 mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 25cf6560257978..5fda6a83fd8213 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -306,9 +306,9 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
   }
 };
 
-class MemorySpaceToOpenCLMemorySpaceConverter : public TypeConverter {
+class MemorySpaceToOpenCLMemorySpaceConverter final : public TypeConverter {
 public:
-  explicit MemorySpaceToOpenCLMemorySpaceConverter() {
+  MemorySpaceToOpenCLMemorySpaceConverter() {
     addConversion([](Type t) { return t; });
     addConversion(
         [this](BaseMemRefType memRefType) -> std::optional<Type> {

>From 7212344e18485804857b96047946ca392aa1cc19 Mon Sep 17 00:00:00 2001
From: Petr Kurapov <petr.kurapov at gmail.com>
Date: Mon, 26 Aug 2024 13:12:40 +0200
Subject: [PATCH 03/16] Update
 mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp

Co-authored-by: Victor Perez <victor.perez at intel.com>
---
 mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 5fda6a83fd8213..3db84678c2d48f 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -344,10 +344,7 @@ class MemorySpaceToOpenCLMemorySpaceConverter final : public TypeConverter {
   std::optional<gpu::AddressSpace> memorySpaceMap(Attribute memSpaceAttr) {
     if (!memSpaceAttr)
       return gpu::AddressSpace::Global;
-    auto gpuAddrSpace = dyn_cast<gpu::AddressSpaceAttr>(memSpaceAttr);
-    if (!gpuAddrSpace)
-      return std::nullopt;
-    return gpuAddrSpace.getValue();
+    return std::nullopt;
   }
 };
 

>From 1961dffdb3e02ae4244eb2e0cd096cb64fc1b9a5 Mon Sep 17 00:00:00 2001
From: Petr Kurapov <petr.a.kurapov at intel.com>
Date: Mon, 26 Aug 2024 11:15:51 +0000
Subject: [PATCH 04/16] add debug type

---
 mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 3db84678c2d48f..a9c6e76b506e61 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -34,6 +34,8 @@
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/FormatVariadic.h"
 
+#define DEBUG_TYPE "gpu-to-llvm-spv"
+
 using namespace mlir;
 
 namespace mlir {

>From 53baba218022fb2f0344bb6a9a5d2c9688ed5139 Mon Sep 17 00:00:00 2001
From: Petr Kurapov <petr.a.kurapov at intel.com>
Date: Thu, 29 Aug 2024 14:23:03 +0000
Subject: [PATCH 05/16] Add func & memeref to llvm conversion and test a more
 complex caller/callee example

---
 .../Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp  | 48 ++++++++-----------
 .../GPUToLLVMSPV/gpu-to-llvm-spv.mlir         | 24 ++++++++++
 2 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index a9c6e76b506e61..6cbc8069c5bf68 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -9,12 +9,14 @@
 #include "mlir/Conversion/GPUToLLVMSPV/GPUToLLVMSPVPass.h"
 
 #include "../GPUCommon/GPUOpsLowering.h"
+#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
 #include "mlir/Conversion/GPUCommon/AttrToSPIRVConverter.h"
 #include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
 #include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
 #include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Conversion/SPIRVCommon/AttrToLLVMConverter.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
@@ -312,27 +314,22 @@ class MemorySpaceToOpenCLMemorySpaceConverter final : public TypeConverter {
 public:
   MemorySpaceToOpenCLMemorySpaceConverter() {
     addConversion([](Type t) { return t; });
-    addConversion(
-        [this](BaseMemRefType memRefType) -> std::optional<Type> {
-          std::optional<gpu::AddressSpace> addrSpace =
-              memorySpaceMap(memRefType.getMemorySpace());
-          if (!addrSpace) {
-            LLVM_DEBUG(
-                llvm::dbgs()
-                << "cannot convert " << memRefType
-                << " due to being unable to find address space in the map\n");
-            return std::nullopt;
-          }
-          auto addrSpaceAttr =
-              gpu::AddressSpaceAttr::get(memRefType.getContext(), *addrSpace);
-          if (auto rankedType = dyn_cast<MemRefType>(memRefType)) {
-            return MemRefType::get(memRefType.getShape(),
-                                   memRefType.getElementType(),
-                                   rankedType.getLayout(), addrSpaceAttr);
-          }
-          return UnrankedMemRefType::get(memRefType.getElementType(),
-                                         addrSpaceAttr);
-        });
+    addConversion([this](BaseMemRefType memRefType) -> std::optional<Type> {
+      // Attach global addr space attribute to memrefs with no addr space attr
+      Attribute memSpaceAttr = memRefType.getMemorySpace();
+      if (memSpaceAttr)
+        return std::nullopt;
+
+      auto addrSpaceAttr = gpu::AddressSpaceAttr::get(
+          memRefType.getContext(), gpu::AddressSpace::Global);
+      if (auto rankedType = dyn_cast<MemRefType>(memRefType)) {
+        return MemRefType::get(memRefType.getShape(),
+                               memRefType.getElementType(),
+                               rankedType.getLayout(), addrSpaceAttr);
+      }
+      return UnrankedMemRefType::get(memRefType.getElementType(),
+                                     addrSpaceAttr);
+    });
     addConversion([this](FunctionType type) {
       auto inputs = llvm::map_to_vector(
           type.getInputs(), [this](Type ty) { return convertType(ty); });
@@ -341,13 +338,6 @@ class MemorySpaceToOpenCLMemorySpaceConverter final : public TypeConverter {
       return FunctionType::get(type.getContext(), inputs, results);
     });
   }
-
-private:
-  std::optional<gpu::AddressSpace> memorySpaceMap(Attribute memSpaceAttr) {
-    if (!memSpaceAttr)
-      return gpu::AddressSpace::Global;
-    return std::nullopt;
-  }
 };
 
 //===----------------------------------------------------------------------===//
@@ -388,6 +378,8 @@ struct GPUToLLVMSPVConversionPass final
                         gpu::ReturnOp, gpu::ShuffleOp, gpu::ThreadIdOp>();
 
     populateGpuToLLVMSPVConversionPatterns(converter, patterns);
+    populateFuncToLLVMConversionPatterns(converter, patterns);
+    populateFinalizeMemRefToLLVMConversionPatterns(converter, patterns);
     populateGpuMemorySpaceAttributeConversions(converter);
 
     if (failed(applyPartialConversion(getOperation(), target,
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index d100f36ae42521..481ae574a3fd4b 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -521,6 +521,7 @@ gpu.module @kernels {
 // -----
 
 gpu.module @kernels {
+// OPENCL:        llvm.func spir_funccc @_Z12get_group_idj(i32)
 // OPENCL-LABEL:   llvm.func spir_funccc @no_address_spaces(
 // OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
 // OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
@@ -528,4 +529,27 @@ gpu.module @kernels {
   gpu.func @no_address_spaces(%arg0: memref<f32>, %arg1: memref<f32, #gpu.address_space<global>>, %arg2: memref<f32>) {
     gpu.return
   }
+
+// OPENCL-LABEL:   llvm.func spir_kernelcc @no_address_spaces_complex(
+// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// OPENCL:         llvm.call @no_address_spaces_callee
+  gpu.func @no_address_spaces_complex(%arg0: memref<2x2xf32>, %arg1: memref<4xf32>) kernel {
+    func.call @no_address_spaces_callee(%arg0, %arg1) : (memref<2x2xf32>, memref<4xf32>) -> ()
+    gpu.return
+  }
+// OPENCL-LABEL:   llvm.func @no_address_spaces_callee(
+// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// OPENCL:         [[C0:%.*]] = llvm.mlir.constant(0 : i32) : i32
+// OPENCL:         llvm.call spir_funccc @_Z12get_group_idj([[C0]]) {
+// OPENCL:         [[LD:%.*]] = llvm.load
+// OPENCL:         llvm.store [[LD]]
+  func.func @no_address_spaces_callee(%arg0: memref<2x2xf32>, %arg1: memref<4xf32>) {
+    %block_id = gpu.block_id x
+    %0 = memref.load %arg0[%block_id, %block_id] : memref<2x2xf32>
+    memref.store %0, %arg1[%block_id] : memref<4xf32>
+    func.return
+  }
+
 }

>From 4b18490fccf6f8ad3cfef96f109ac13d59624080 Mon Sep 17 00:00:00 2001
From: Petr Kurapov <petr.a.kurapov at intel.com>
Date: Wed, 11 Sep 2024 11:43:43 +0000
Subject: [PATCH 06/16] Update calling convention for func.func after the
 lowering

---
 mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp   | 13 +++++++++++++
 .../Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir    |  4 ++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 6cbc8069c5bf68..16ff23eb2bcf50 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -385,6 +385,19 @@ struct GPUToLLVMSPVConversionPass final
     if (failed(applyPartialConversion(getOperation(), target,
                                       std::move(patterns))))
       signalPassFailure();
+
+    // `func.func`s are not handled by the lowering, so need a proper calling
+    // convention set separately.
+    getOperation().walk([&](LLVM::LLVMFuncOp f) {
+      if (f.getCConv() == LLVM::CConv::C) {
+        f.setCConv(LLVM::CConv::SPIR_FUNC);
+      }
+    });
+    getOperation().walk([&](LLVM::CallOp c) {
+      if (c.getCConv() == LLVM::CConv::C) {
+        c.setCConv(LLVM::CConv::SPIR_FUNC);
+      }
+    });
   }
 };
 } // namespace
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index 481ae574a3fd4b..2a629e814ecc2d 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -533,12 +533,12 @@ gpu.module @kernels {
 // OPENCL-LABEL:   llvm.func spir_kernelcc @no_address_spaces_complex(
 // OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
 // OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
-// OPENCL:         llvm.call @no_address_spaces_callee
+// OPENCL:         llvm.call spir_funccc @no_address_spaces_callee
   gpu.func @no_address_spaces_complex(%arg0: memref<2x2xf32>, %arg1: memref<4xf32>) kernel {
     func.call @no_address_spaces_callee(%arg0, %arg1) : (memref<2x2xf32>, memref<4xf32>) -> ()
     gpu.return
   }
-// OPENCL-LABEL:   llvm.func @no_address_spaces_callee(
+// OPENCL-LABEL:   llvm.func spir_funccc @no_address_spaces_callee(
 // OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
 // OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
 // OPENCL:         [[C0:%.*]] = llvm.mlir.constant(0 : i32) : i32

>From dfbcd231b8dd87f35110600023135ccc8cdf84be Mon Sep 17 00:00:00 2001
From: Petr Kurapov <petr.kurapov at gmail.com>
Date: Wed, 11 Sep 2024 16:40:29 +0200
Subject: [PATCH 07/16] Update
 mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp

Co-authored-by: Jakub Kuderski <kubakuderski at gmail.com>
---
 mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 16ff23eb2bcf50..6110ba92edc268 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -388,12 +388,12 @@ struct GPUToLLVMSPVConversionPass final
 
     // `func.func`s are not handled by the lowering, so need a proper calling
     // convention set separately.
-    getOperation().walk([&](LLVM::LLVMFuncOp f) {
+    getOperation().walk([](LLVM::LLVMFuncOp f) {
       if (f.getCConv() == LLVM::CConv::C) {
         f.setCConv(LLVM::CConv::SPIR_FUNC);
       }
     });
-    getOperation().walk([&](LLVM::CallOp c) {
+    getOperation().walk([](LLVM::CallOp c) {
       if (c.getCConv() == LLVM::CConv::C) {
         c.setCConv(LLVM::CConv::SPIR_FUNC);
       }

>From b39e0559db7147362a3dd138055efa9b12bd4f6a Mon Sep 17 00:00:00 2001
From: Petr Kurapov <petr.a.kurapov at intel.com>
Date: Fri, 13 Sep 2024 09:35:56 +0000
Subject: [PATCH 08/16] Rename OPENCL to OPENCL-CHECK

---
 .../GPUToLLVMSPV/gpu-to-llvm-spv.mlir         | 34 +++++++++----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index 2a629e814ecc2d..89bcb616e3f228 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -3,7 +3,7 @@
 // RUN: mlir-opt -pass-pipeline="builtin.module(gpu.module(convert-gpu-to-llvm-spv{index-bitwidth=32}))" -split-input-file -verify-diagnostics %s \
 // RUN: | FileCheck --check-prefixes=CHECK-32,CHECK %s
 // RUN: mlir-opt -pass-pipeline="builtin.module(gpu.module(convert-gpu-to-llvm-spv{force-opencl-address-spaces}))" -split-input-file -verify-diagnostics %s \
-// RUN: | FileCheck --check-prefixes=OPENCL %s
+// RUN: | FileCheck --check-prefixes=CHECK-OPENCL %s
 
 gpu.module @builtins {
   // CHECK-64:        llvm.func spir_funccc @_Z14get_num_groupsj(i32) -> i64 attributes {
@@ -521,30 +521,30 @@ gpu.module @kernels {
 // -----
 
 gpu.module @kernels {
-// OPENCL:        llvm.func spir_funccc @_Z12get_group_idj(i32)
-// OPENCL-LABEL:   llvm.func spir_funccc @no_address_spaces(
-// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
-// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
-// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// CHECK-OPENCL:        llvm.func spir_funccc @_Z12get_group_idj(i32)
+// CHECK-OPENCL-LABEL:   llvm.func spir_funccc @no_address_spaces(
+// CHECK-OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// CHECK-OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// CHECK-OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
   gpu.func @no_address_spaces(%arg0: memref<f32>, %arg1: memref<f32, #gpu.address_space<global>>, %arg2: memref<f32>) {
     gpu.return
   }
 
-// OPENCL-LABEL:   llvm.func spir_kernelcc @no_address_spaces_complex(
-// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
-// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
-// OPENCL:         llvm.call spir_funccc @no_address_spaces_callee
+// CHECK-OPENCL-LABEL:   llvm.func spir_kernelcc @no_address_spaces_complex(
+// CHECK-OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// CHECK-OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// CHECK-OPENCL:         llvm.call spir_funccc @no_address_spaces_callee
   gpu.func @no_address_spaces_complex(%arg0: memref<2x2xf32>, %arg1: memref<4xf32>) kernel {
     func.call @no_address_spaces_callee(%arg0, %arg1) : (memref<2x2xf32>, memref<4xf32>) -> ()
     gpu.return
   }
-// OPENCL-LABEL:   llvm.func spir_funccc @no_address_spaces_callee(
-// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
-// OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
-// OPENCL:         [[C0:%.*]] = llvm.mlir.constant(0 : i32) : i32
-// OPENCL:         llvm.call spir_funccc @_Z12get_group_idj([[C0]]) {
-// OPENCL:         [[LD:%.*]] = llvm.load
-// OPENCL:         llvm.store [[LD]]
+// CHECK-OPENCL-LABEL:   llvm.func spir_funccc @no_address_spaces_callee(
+// CHECK-OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// CHECK-OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// CHECK-OPENCL:         [[C0:%.*]] = llvm.mlir.constant(0 : i32) : i32
+// CHECK-OPENCL:         llvm.call spir_funccc @_Z12get_group_idj([[C0]]) {
+// CHECK-OPENCL:         [[LD:%.*]] = llvm.load
+// CHECK-OPENCL:         llvm.store [[LD]]
   func.func @no_address_spaces_callee(%arg0: memref<2x2xf32>, %arg1: memref<4xf32>) {
     %block_id = gpu.block_id x
     %0 = memref.load %arg0[%block_id, %block_id] : memref<2x2xf32>

>From d5519a53e9c406cfae8291210776febcd2dfa5fd Mon Sep 17 00:00:00 2001
From: Petr Kurapov <petr.a.kurapov at intel.com>
Date: Thu, 19 Sep 2024 08:53:34 +0000
Subject: [PATCH 09/16] remove calling convention updates to func.func

---
 mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp   | 13 -------------
 .../Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir    |  4 ++--
 2 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 6110ba92edc268..6cbc8069c5bf68 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -385,19 +385,6 @@ struct GPUToLLVMSPVConversionPass final
     if (failed(applyPartialConversion(getOperation(), target,
                                       std::move(patterns))))
       signalPassFailure();
-
-    // `func.func`s are not handled by the lowering, so need a proper calling
-    // convention set separately.
-    getOperation().walk([](LLVM::LLVMFuncOp f) {
-      if (f.getCConv() == LLVM::CConv::C) {
-        f.setCConv(LLVM::CConv::SPIR_FUNC);
-      }
-    });
-    getOperation().walk([](LLVM::CallOp c) {
-      if (c.getCConv() == LLVM::CConv::C) {
-        c.setCConv(LLVM::CConv::SPIR_FUNC);
-      }
-    });
   }
 };
 } // namespace
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index 89bcb616e3f228..11455eb84a1aeb 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -538,11 +538,11 @@ gpu.module @kernels {
     func.call @no_address_spaces_callee(%arg0, %arg1) : (memref<2x2xf32>, memref<4xf32>) -> ()
     gpu.return
   }
-// CHECK-OPENCL-LABEL:   llvm.func spir_funccc @no_address_spaces_callee(
+// CHECK-OPENCL-LABEL:   llvm.func @no_address_spaces_callee(
 // CHECK-OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
 // CHECK-OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
 // CHECK-OPENCL:         [[C0:%.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK-OPENCL:         llvm.call spir_funccc @_Z12get_group_idj([[C0]]) {
+// CHECK-OPENCL:         llvm.call @_Z12get_group_idj([[C0]]) {
 // CHECK-OPENCL:         [[LD:%.*]] = llvm.load
 // CHECK-OPENCL:         llvm.store [[LD]]
   func.func @no_address_spaces_callee(%arg0: memref<2x2xf32>, %arg1: memref<4xf32>) {

>From 598f9b14d9cbb82639e78796500b22293daa7636 Mon Sep 17 00:00:00 2001
From: Petr Kurapov <petr.kurapov at gmail.com>
Date: Thu, 19 Sep 2024 12:02:48 +0200
Subject: [PATCH 10/16] Update
 mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp

Co-authored-by: Victor Perez <victor.perez at intel.com>
---
 mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 6cbc8069c5bf68..9841c27bae812e 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -314,7 +314,7 @@ class MemorySpaceToOpenCLMemorySpaceConverter final : public TypeConverter {
 public:
   MemorySpaceToOpenCLMemorySpaceConverter() {
     addConversion([](Type t) { return t; });
-    addConversion([this](BaseMemRefType memRefType) -> std::optional<Type> {
+    addConversion([](BaseMemRefType memRefType) -> std::optional<Type> {
       // Attach global addr space attribute to memrefs with no addr space attr
       Attribute memSpaceAttr = memRefType.getMemorySpace();
       if (memSpaceAttr)

>From 3fde4ae5773376bbcbb36b61ef68f94f50d856f7 Mon Sep 17 00:00:00 2001
From: Petr Kurapov <petr.a.kurapov at intel.com>
Date: Thu, 19 Sep 2024 13:44:35 +0000
Subject: [PATCH 11/16] Make forcing opencl address spaces the default behavior

---
 mlir/include/mlir/Conversion/Passes.td        |  5 +-
 .../Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp  |  3 +-
 .../GPUToLLVMSPV/gpu-to-llvm-spv.mlir         | 46 +++++++++----------
 3 files changed, 25 insertions(+), 29 deletions(-)

diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index 05f07421b8f526..a84eb6eb19cc32 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -541,10 +541,7 @@ def ConvertGpuOpsToLLVMSPVOps : Pass<"convert-gpu-to-llvm-spv", "gpu::GPUModuleO
   let options = [
     Option<"indexBitwidth", "index-bitwidth", "unsigned",
            /*default=kDeriveIndexBitwidthFromDataLayout*/"0",
-           "Bitwidth of the index type, 0 to use size of machine word">,
-    Option<"forceOpenclAddressSpaces", "force-opencl-address-spaces",
-           "bool", /*default=*/"false",
-           "Force kernel argument pointers to have address space global.">,
+           "Bitwidth of the index type, 0 to use size of machine word">
   ];
 }
 
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 9841c27bae812e..18106b7ab78de5 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -359,7 +359,8 @@ struct GPUToLLVMSPVConversionPass final
     LLVMTypeConverter converter(context, options);
     LLVMConversionTarget target(*context);
 
-    if (forceOpenclAddressSpaces) {
+    // Force OpenCL address spaces when they are not present
+    {
       MemorySpaceToOpenCLMemorySpaceConverter converter;
       AttrTypeReplacer replacer;
       replacer.addReplacement([&converter](BaseMemRefType origType)
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index 11455eb84a1aeb..d25fa8c220ea0a 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -2,8 +2,6 @@
 // RUN: | FileCheck --check-prefixes=CHECK-64,CHECK %s
 // RUN: mlir-opt -pass-pipeline="builtin.module(gpu.module(convert-gpu-to-llvm-spv{index-bitwidth=32}))" -split-input-file -verify-diagnostics %s \
 // RUN: | FileCheck --check-prefixes=CHECK-32,CHECK %s
-// RUN: mlir-opt -pass-pipeline="builtin.module(gpu.module(convert-gpu-to-llvm-spv{force-opencl-address-spaces}))" -split-input-file -verify-diagnostics %s \
-// RUN: | FileCheck --check-prefixes=CHECK-OPENCL %s
 
 gpu.module @builtins {
   // CHECK-64:        llvm.func spir_funccc @_Z14get_num_groupsj(i32) -> i64 attributes {
@@ -398,20 +396,20 @@ gpu.module @kernels {
     gpu.return
   }
 
-  // CHECK-64:   llvm.func spir_kernelcc @kernel_with_conv_args(%{{.*}}: i64, %{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i64) attributes {gpu.kernel} {
-  // CHECK-32:   llvm.func spir_kernelcc @kernel_with_conv_args(%{{.*}}: i32, %{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i32) attributes {gpu.kernel} {
+  // CHECK-64:   llvm.func spir_kernelcc @kernel_with_conv_args(%{{.*}}: i64, %{{.*}}: !llvm.ptr<1>, %{{.*}}: !llvm.ptr<1>, %{{.*}}: i64) attributes {gpu.kernel} {
+  // CHECK-32:   llvm.func spir_kernelcc @kernel_with_conv_args(%{{.*}}: i32, %{{.*}}: !llvm.ptr<1>, %{{.*}}: !llvm.ptr<1>, %{{.*}}: i32) attributes {gpu.kernel} {
   gpu.func @kernel_with_conv_args(%arg0: index, %arg1: memref<index>) kernel {
     gpu.return
   }
 
-  // CHECK-64:   llvm.func spir_kernelcc @kernel_with_sized_memref(%{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64) attributes {gpu.kernel} {
-  // CHECK-32:   llvm.func spir_kernelcc @kernel_with_sized_memref(%{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32) attributes {gpu.kernel} {
+  // CHECK-64:   llvm.func spir_kernelcc @kernel_with_sized_memref(%{{.*}}: !llvm.ptr<1>, %{{.*}}: !llvm.ptr<1>, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64) attributes {gpu.kernel} {
+  // CHECK-32:   llvm.func spir_kernelcc @kernel_with_sized_memref(%{{.*}}: !llvm.ptr<1>, %{{.*}}: !llvm.ptr<1>, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32) attributes {gpu.kernel} {
   gpu.func @kernel_with_sized_memref(%arg0: memref<1xindex>) kernel {
     gpu.return
   }
 
-  // CHECK-64:   llvm.func spir_kernelcc @kernel_with_ND_memref(%{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64) attributes {gpu.kernel} {
-  // CHECK-32:   llvm.func spir_kernelcc @kernel_with_ND_memref(%{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32) attributes {gpu.kernel} {
+  // CHECK-64:   llvm.func spir_kernelcc @kernel_with_ND_memref(%{{.*}}: !llvm.ptr<1>, %{{.*}}: !llvm.ptr<1>, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64) attributes {gpu.kernel} {
+  // CHECK-32:   llvm.func spir_kernelcc @kernel_with_ND_memref(%{{.*}}: !llvm.ptr<1>, %{{.*}}: !llvm.ptr<1>, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32, %{{.*}}: i32) attributes {gpu.kernel} {
   gpu.func @kernel_with_ND_memref(%arg0: memref<128x128x128xindex>) kernel {
     gpu.return
   }
@@ -521,30 +519,30 @@ gpu.module @kernels {
 // -----
 
 gpu.module @kernels {
-// CHECK-OPENCL:        llvm.func spir_funccc @_Z12get_group_idj(i32)
-// CHECK-OPENCL-LABEL:   llvm.func spir_funccc @no_address_spaces(
-// CHECK-OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
-// CHECK-OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
-// CHECK-OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// CHECK:         llvm.func spir_funccc @_Z12get_group_idj(i32)
+// CHECK-LABEL:   llvm.func spir_funccc @no_address_spaces(
+// CHECK-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// CHECK-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// CHECK-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
   gpu.func @no_address_spaces(%arg0: memref<f32>, %arg1: memref<f32, #gpu.address_space<global>>, %arg2: memref<f32>) {
     gpu.return
   }
 
-// CHECK-OPENCL-LABEL:   llvm.func spir_kernelcc @no_address_spaces_complex(
-// CHECK-OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
-// CHECK-OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
-// CHECK-OPENCL:         llvm.call spir_funccc @no_address_spaces_callee
+// CHECK-LABEL:   llvm.func spir_kernelcc @no_address_spaces_complex(
+// CHECK-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// CHECK-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// CHECK:         llvm.call @no_address_spaces_callee
   gpu.func @no_address_spaces_complex(%arg0: memref<2x2xf32>, %arg1: memref<4xf32>) kernel {
     func.call @no_address_spaces_callee(%arg0, %arg1) : (memref<2x2xf32>, memref<4xf32>) -> ()
     gpu.return
   }
-// CHECK-OPENCL-LABEL:   llvm.func @no_address_spaces_callee(
-// CHECK-OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
-// CHECK-OPENCL-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
-// CHECK-OPENCL:         [[C0:%.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK-OPENCL:         llvm.call @_Z12get_group_idj([[C0]]) {
-// CHECK-OPENCL:         [[LD:%.*]] = llvm.load
-// CHECK-OPENCL:         llvm.store [[LD]]
+// CHECK-LABEL:   llvm.func @no_address_spaces_callee(
+// CHECK-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// CHECK-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// CHECK:         [[C0:%.*]] = llvm.mlir.constant(0 : i32) : i32
+// CHECK:         llvm.call spir_funccc @_Z12get_group_idj([[C0]]) {
+// CHECK:         [[LD:%.*]] = llvm.load
+// CHECK:         llvm.store [[LD]]
   func.func @no_address_spaces_callee(%arg0: memref<2x2xf32>, %arg1: memref<4xf32>) {
     %block_id = gpu.block_id x
     %0 = memref.load %arg0[%block_id, %block_id] : memref<2x2xf32>

>From 71006c69fc7a05a8b31b78210c4d8fa1085d8823 Mon Sep 17 00:00:00 2001
From: Petr Kurapov <petr.a.kurapov at intel.com>
Date: Thu, 19 Sep 2024 14:45:29 +0000
Subject: [PATCH 12/16] put the comma back

---
 mlir/include/mlir/Conversion/Passes.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index a84eb6eb19cc32..7bde9e490e4f4e 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -541,7 +541,7 @@ def ConvertGpuOpsToLLVMSPVOps : Pass<"convert-gpu-to-llvm-spv", "gpu::GPUModuleO
   let options = [
     Option<"indexBitwidth", "index-bitwidth", "unsigned",
            /*default=kDeriveIndexBitwidthFromDataLayout*/"0",
-           "Bitwidth of the index type, 0 to use size of machine word">
+           "Bitwidth of the index type, 0 to use size of machine word">,
   ];
 }
 

>From a1666d65a95d639deca7e0514076b6cca025027f Mon Sep 17 00:00:00 2001
From: Petr Kurapov <petr.a.kurapov at intel.com>
Date: Thu, 19 Sep 2024 15:56:40 +0000
Subject: [PATCH 13/16] Insert integer attributes instead of the gpu address
 space

---
 .../lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 11 ++++-------
 .../Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir | 16 +++++++++-------
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 18106b7ab78de5..b8f832c59d727b 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -312,16 +312,15 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
 
 class MemorySpaceToOpenCLMemorySpaceConverter final : public TypeConverter {
 public:
-  MemorySpaceToOpenCLMemorySpaceConverter() {
+  MemorySpaceToOpenCLMemorySpaceConverter(MLIRContext *ctx) {
     addConversion([](Type t) { return t; });
-    addConversion([](BaseMemRefType memRefType) -> std::optional<Type> {
+    addConversion([ctx](BaseMemRefType memRefType) -> std::optional<Type> {
       // Attach global addr space attribute to memrefs with no addr space attr
       Attribute memSpaceAttr = memRefType.getMemorySpace();
       if (memSpaceAttr)
         return std::nullopt;
 
-      auto addrSpaceAttr = gpu::AddressSpaceAttr::get(
-          memRefType.getContext(), gpu::AddressSpace::Global);
+      Attribute addrSpaceAttr = IntegerAttr::get(IntegerType::get(ctx, 64), 1);
       if (auto rankedType = dyn_cast<MemRefType>(memRefType)) {
         return MemRefType::get(memRefType.getShape(),
                                memRefType.getElementType(),
@@ -361,7 +360,7 @@ struct GPUToLLVMSPVConversionPass final
 
     // Force OpenCL address spaces when they are not present
     {
-      MemorySpaceToOpenCLMemorySpaceConverter converter;
+      MemorySpaceToOpenCLMemorySpaceConverter converter(context);
       AttrTypeReplacer replacer;
       replacer.addReplacement([&converter](BaseMemRefType origType)
                                   -> std::optional<BaseMemRefType> {
@@ -379,8 +378,6 @@ struct GPUToLLVMSPVConversionPass final
                         gpu::ReturnOp, gpu::ShuffleOp, gpu::ThreadIdOp>();
 
     populateGpuToLLVMSPVConversionPatterns(converter, patterns);
-    populateFuncToLLVMConversionPatterns(converter, patterns);
-    populateFinalizeMemRefToLLVMConversionPatterns(converter, patterns);
     populateGpuMemorySpaceAttributeConversions(converter);
 
     if (failed(applyPartialConversion(getOperation(), target,
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index d25fa8c220ea0a..e69359dd10e041 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -531,18 +531,20 @@ gpu.module @kernels {
 // CHECK-LABEL:   llvm.func spir_kernelcc @no_address_spaces_complex(
 // CHECK-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
 // CHECK-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
-// CHECK:         llvm.call @no_address_spaces_callee
+// CHECK:         func.call @no_address_spaces_callee(%{{[0-9]+}}, %{{[0-9]+}})
+// CHECK-SAME:                                        : (memref<2x2xf32, 1>, memref<4xf32, 1>)
   gpu.func @no_address_spaces_complex(%arg0: memref<2x2xf32>, %arg1: memref<4xf32>) kernel {
     func.call @no_address_spaces_callee(%arg0, %arg1) : (memref<2x2xf32>, memref<4xf32>) -> ()
     gpu.return
   }
-// CHECK-LABEL:   llvm.func @no_address_spaces_callee(
-// CHECK-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
-// CHECK-SAME:                                             %{{[a-zA-Z_][a-zA-Z0-9_]*}}: !llvm.ptr<1>
+// CHECK-LABEL:   func.func @no_address_spaces_callee(
+// CHECK-SAME:                                             [[ARG0:%.*]]: memref<2x2xf32, 1> 
+// CHECK-SAME:                                             [[ARG1:%.*]]: memref<4xf32, 1>
 // CHECK:         [[C0:%.*]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK:         llvm.call spir_funccc @_Z12get_group_idj([[C0]]) {
-// CHECK:         [[LD:%.*]] = llvm.load
-// CHECK:         llvm.store [[LD]]
+// CHECK:         [[I0:%.*]] = llvm.call spir_funccc @_Z12get_group_idj([[C0]]) {
+// CHECK:         [[I1:%.*]] = builtin.unrealized_conversion_cast [[I0]] : i64 to index
+// CHECK:         [[LD:%.*]] = memref.load [[ARG0]]{{\[}}[[I1]], [[I1]]{{\]}} : memref<2x2xf32, 1>
+// CHECK:         memref.store [[LD]], [[ARG1]]{{\[}}[[I1]]{{\]}} : memref<4xf32, 1>
   func.func @no_address_spaces_callee(%arg0: memref<2x2xf32>, %arg1: memref<4xf32>) {
     %block_id = gpu.block_id x
     %0 = memref.load %arg0[%block_id, %block_id] : memref<2x2xf32>

>From c2b22c7c3a0ef807376fdf4663c2a613516d7c67 Mon Sep 17 00:00:00 2001
From: Petr Kurapov <petr.a.kurapov at intel.com>
Date: Fri, 20 Sep 2024 12:46:16 +0000
Subject: [PATCH 14/16] Avoid hardcoding default address space value

---
 mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index b8f832c59d727b..1d3d07f23a004b 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -320,7 +320,10 @@ class MemorySpaceToOpenCLMemorySpaceConverter final : public TypeConverter {
       if (memSpaceAttr)
         return std::nullopt;
 
-      Attribute addrSpaceAttr = IntegerAttr::get(IntegerType::get(ctx, 64), 1);
+      unsigned globalAddrspace = storageClassToAddressSpace(
+          spirv::ClientAPI::OpenCL, spirv::StorageClass::CrossWorkgroup);
+      Attribute addrSpaceAttr =
+          IntegerAttr::get(IntegerType::get(ctx, 64), globalAddrspace);
       if (auto rankedType = dyn_cast<MemRefType>(memRefType)) {
         return MemRefType::get(memRefType.getShape(),
                                memRefType.getElementType(),

>From a0a5a00fecfa36ca71687ca38fc475cf6588a0bc Mon Sep 17 00:00:00 2001
From: Petr Kurapov <petr.a.kurapov at intel.com>
Date: Fri, 20 Sep 2024 12:46:35 +0000
Subject: [PATCH 15/16] Fix 322-bit case

---
 mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index e69359dd10e041..94572b96c66221 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -542,7 +542,8 @@ gpu.module @kernels {
 // CHECK-SAME:                                             [[ARG1:%.*]]: memref<4xf32, 1>
 // CHECK:         [[C0:%.*]] = llvm.mlir.constant(0 : i32) : i32
 // CHECK:         [[I0:%.*]] = llvm.call spir_funccc @_Z12get_group_idj([[C0]]) {
-// CHECK:         [[I1:%.*]] = builtin.unrealized_conversion_cast [[I0]] : i64 to index
+// CHECK-32:         [[I1:%.*]] = builtin.unrealized_conversion_cast [[I0]] : i32 to index
+// CHECK-64:         [[I1:%.*]] = builtin.unrealized_conversion_cast [[I0]] : i64 to index
 // CHECK:         [[LD:%.*]] = memref.load [[ARG0]]{{\[}}[[I1]], [[I1]]{{\]}} : memref<2x2xf32, 1>
 // CHECK:         memref.store [[LD]], [[ARG1]]{{\[}}[[I1]]{{\]}} : memref<4xf32, 1>
   func.func @no_address_spaces_callee(%arg0: memref<2x2xf32>, %arg1: memref<4xf32>) {

>From abb6ef95669f358684c90bc46fb80859a77d5180 Mon Sep 17 00:00:00 2001
From: Petr Kurapov <petr.a.kurapov at intel.com>
Date: Mon, 23 Sep 2024 09:44:35 +0000
Subject: [PATCH 16/16] Drop unused includes

---
 mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 1d3d07f23a004b..525c2dd0262c63 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -9,14 +9,12 @@
 #include "mlir/Conversion/GPUToLLVMSPV/GPUToLLVMSPVPass.h"
 
 #include "../GPUCommon/GPUOpsLowering.h"
-#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
 #include "mlir/Conversion/GPUCommon/AttrToSPIRVConverter.h"
 #include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
 #include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
 #include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
-#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Conversion/SPIRVCommon/AttrToLLVMConverter.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMAttrs.h"



More information about the Mlir-commits mailing list