[Mlir-commits] [mlir] [mlir][gpu][RFC] Add a source language enum attribute to gpu.func and gpu.launch ops (PR #133163)

Wed Mar 26 14:02:42 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-mlir

Author: Valentin Clement (バレンタイン クレメン) (clementval)

<details>
<summary>Changes</summary>

As we are experimenting to implement OpenACC and CUDA Fortran with some use of the GPU dialect, we have the need to distinguish between the source language of each kernel.

The kernel launch for OpenACC and CUDA Fortran are using different mechanism and we would like to have this information attached to the kernel (`gpu.func` or `gpu.launch`) so the conversion pattern that rewrite the launch operation can make the adequate decision on which API to use.

This patch adds an enum attribute to carry the source language information on `gpu.func` and `gpu.launch` operation. The kernel outlining pass is also updated to propagate the information from `gpu.launch` to the newly created `gpu.func`.

---
Full diff: https://github.com/llvm/llvm-project/pull/133163.diff


4 Files Affected:

- (modified) mlir/include/mlir/Dialect/GPU/IR/GPUOps.td (+43-21) 
- (modified) mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp (+1) 
- (modified) mlir/test/Dialect/GPU/ops.mlir (+13) 
- (modified) mlir/test/Dialect/GPU/outlining.mlir (+25) 


``````````diff

diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 2b1ce573effd0..0793fdc427168 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -351,6 +351,23 @@ def GPU_OptionalDimSizeHintAttr : ConfinedAttr<OptionalAttr<DenseI32ArrayAttr>,
   [AttrConstraint<Or<[IsNullAttr.predicate, DenseArrayCount<3>.predicate]>,
     "with 3 elements (if present)">]>;
 
+// Source language of the gpu.func or gpu.launc_func operations.
+def GPU_KernelSourceLangOpenACC : I32EnumAttrCase<"OpenACC", 0, "openacc">;
+def GPU_KernelSourceLangOpenMP : I32EnumAttrCase<"OpenMP", 1, "openmp">;
+def GPU_KernelSourceLangCUDAFortran
+    : I32EnumAttrCase<"CUDAFortran", 2, "cuda_fortran">;
+
+def GPU_KernelSourceLang
+    : I32EnumAttr<"KernelSourceLang", "Source language of a kernel",
+                  [GPU_KernelSourceLangOpenACC, GPU_KernelSourceLangOpenMP,
+                   GPU_KernelSourceLangCUDAFortran,
+]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::mlir::gpu";
+}
+def GPU_KernelSourceLangAttr
+    : EnumAttr<GPU_Dialect, GPU_KernelSourceLang, "kernel_source_lang">;
+
 def GPU_GPUFuncOp : GPU_Op<"func", [
     HasParent<"GPUModuleOp">, AutomaticAllocationScope, FunctionOpInterface,
     IsolatedFromAbove, AffineScope
@@ -426,12 +443,13 @@ def GPU_GPUFuncOp : GPU_Op<"func", [
   }];
 
   let arguments = (ins TypeAttrOf<FunctionType>:$function_type,
-                       OptionalAttr<DictArrayAttr>:$arg_attrs,
-                       OptionalAttr<DictArrayAttr>:$res_attrs,
-                       OptionalAttr<DictArrayAttr>:$workgroup_attrib_attrs,
-                       OptionalAttr<DictArrayAttr>:$private_attrib_attrs,
-                       GPU_OptionalDimSizeHintAttr:$known_block_size,
-                       GPU_OptionalDimSizeHintAttr:$known_grid_size);
+      OptionalAttr<DictArrayAttr>:$arg_attrs,
+      OptionalAttr<DictArrayAttr>:$res_attrs,
+      OptionalAttr<DictArrayAttr>:$workgroup_attrib_attrs,
+      OptionalAttr<DictArrayAttr>:$private_attrib_attrs,
+      GPU_OptionalDimSizeHintAttr:$known_block_size,
+      GPU_OptionalDimSizeHintAttr:$known_grid_size,
+      OptionalAttr<GPU_KernelSourceLangAttr>:$kernel_source_lang);
   let regions = (region AnyRegion:$body);
 
   let skipDefaultBuilders = 1;
@@ -793,20 +811,21 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
   let hasVerifier = 1;
 }
 
-def GPU_LaunchOp : GPU_Op<"launch", [
-      AffineScope, AutomaticAllocationScope, AttrSizedOperandSegments,
-      DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
-      GPU_AsyncOpInterface, RecursiveMemoryEffects]>,
-    Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
-               Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
-               Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
-               Optional<Index>:$clusterSizeX,
-               Optional<Index>:$clusterSizeY,
-               Optional<Index>:$clusterSizeZ,
-               Optional<I32>:$dynamicSharedMemorySize,
-               OptionalAttr<SymbolRefAttr>:$kernelFunc,
-               OptionalAttr<SymbolRefAttr>:$kernelModule)>,
-    Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
+def GPU_LaunchOp
+    : GPU_Op<"launch", [AffineScope, AutomaticAllocationScope,
+                        AttrSizedOperandSegments,
+                        DeclareOpInterfaceMethods<
+                            InferIntRangeInterface, ["inferResultRanges"]>,
+                        GPU_AsyncOpInterface, RecursiveMemoryEffects]>,
+      Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
+          Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
+          Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
+          Optional<Index>:$clusterSizeX, Optional<Index>:$clusterSizeY,
+          Optional<Index>:$clusterSizeZ, Optional<I32>:$dynamicSharedMemorySize,
+          OptionalAttr<SymbolRefAttr>:$kernelFunc,
+          OptionalAttr<SymbolRefAttr>:$kernelModule,
+          OptionalAttr<GPU_KernelSourceLangAttr>:$kernelSourceLang)>,
+      Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
   let summary = "GPU kernel launch operation";
 
   let description = [{
@@ -840,7 +859,10 @@ def GPU_LaunchOp : GPU_Op<"launch", [
     -   a variadic number of Private memory attributions.
 
     The `kernelFunc` and `kernelModule` attributes are optional and specifies
-    the kernel name and a module in which the kernel should be outlined. 
+    the kernel name and a module in which the kernel should be outlined.
+
+    The optional `kernelSourceLang` attribute can be set to specify the Source
+    language of the kernel.
 
     Syntax:
 
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 62dc1f13412d4..a1f9e5a8381a4 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -202,6 +202,7 @@ static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp,
       TypeRange(ValueRange(launchOp.getPrivateAttributions())));
   outlinedFunc->setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
                         builder.getUnitAttr());
+  outlinedFunc.setKernelSourceLangAttr(launchOp.getKernelSourceLangAttr());
 
   // If we can infer bounds on the grid and/or block sizes from the arguments
   // to the launch op, propagate them to the generated kernel. This is safe
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index 99915c493ea46..15279fbe36140 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -14,6 +14,13 @@ module attributes {gpu.container_module} {
       // CHECK: gpu.terminator
       gpu.terminator
     }
+    // CHECK: gpu.launch
+    gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz)
+               threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz) {
+      // CHECK: gpu.terminator
+      gpu.terminator
+    // CHECK: } {kernelSourceLang = #gpu<kernel_source_lang openmp>}
+    } {kernelSourceLang = #gpu<kernel_source_lang openmp>}
     return
   }
 
@@ -279,6 +286,12 @@ module attributes {gpu.container_module} {
     gpu.func @empty_attribution(%arg0: f32) workgroup() private() {
       gpu.return
     }
+
+    // CHECK-LABEL: gpu.func @source_lang(%{{.*}}: f32) kernel attributes {kernel_source_lang = #gpu<kernel_source_lang openacc>}
+    // CHECK: {
+    gpu.func @source_lang(%arg0: f32) kernel attributes {kernel_source_lang = #gpu<kernel_source_lang openacc>} {
+      gpu.return
+    }
   }
 
   gpu.module @explicit_attributions {
diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index d48fa054432d1..0ece7519abe3e 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -630,3 +630,28 @@ func.func @testNoAttributes() {
   }
   return
 }
+
+// -----
+
+// This test tests that the kernelSourceLang is propagated to the gpu.func.
+
+// CHECK-LABEL: func.func @testKernelFuncOnly()
+// CHECK: gpu.launch_func  @testKernelFuncOnly_kernel::@testKernelFuncOnly_kernel
+
+// CHECK: gpu.module @testKernelFuncOnly_kernel
+// CHECK: gpu.func @testKernelFuncOnly_kernel() kernel attributes {kernel_source_lang = #gpu<kernel_source_lang openacc>
+func.func @testKernelFuncOnly() {
+  %gDimX = arith.constant 8 : index
+  %gDimY = arith.constant 12 : index
+  %gDimZ = arith.constant 16 : index
+  %bDimX = arith.constant 32 : index
+  %bDimY = arith.constant 16 : index
+  %bDimZ = arith.constant 8 : index
+
+  gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
+             threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
+    "some_op"(%bx, %tx) : (index, index) -> ()
+    gpu.terminator
+  } {kernelSourceLang = #gpu<kernel_source_lang openacc>}
+  return
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/133163