[flang-commits] [flang] Reland '[flang][cuda] Add cuf.register_kernel operation' (PR #112389)

Valentin Clement バレンタイン クレメン via flang-commits flang-commits at lists.llvm.org
Tue Oct 15 09:18:59 PDT 2024


https://github.com/clementval created https://github.com/llvm/llvm-project/pull/112389

The operation will be used in the CUF constructor to register the kernel functions. This allow to delay this until codegen when the gpu.binary will be available.

Reland of #112268 with correct shared library build support. 

>From 7d969d6085bed2e2021e76d27ee02a0aac420613 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 14 Oct 2024 14:31:05 -0700
Subject: [PATCH] [flang][cuda] Add cuf.register_kernel operation

The operation will be used in the CUF constructor to register
the kernel functions. This allow to delay this until codegen when
the gpu.binary will be available.
---
 .../flang/Optimizer/Dialect/CUF/CUFOps.td     | 19 +++++++
 .../lib/Optimizer/Dialect/CUF/CMakeLists.txt  |  1 +
 flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp    | 37 ++++++++++++++
 flang/test/Fir/CUDA/cuda-register-func.fir    | 20 ++++++++
 flang/test/Fir/cuf-invalid.fir                | 50 +++++++++++++++++++
 flang/tools/fir-opt/fir-opt.cpp               |  1 +
 6 files changed, 128 insertions(+)
 create mode 100644 flang/test/Fir/CUDA/cuda-register-func.fir

diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index f643674f1d5d6b..98d1ef529738c7 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -288,4 +288,23 @@ def cuf_KernelOp : cuf_Op<"kernel", [AttrSizedOperandSegments,
   let hasVerifier = 1;
 }
 
+def cuf_RegisterKernelOp : cuf_Op<"register_kernel", []> {
+  let summary = "Register a CUDA kernel";
+
+  let arguments = (ins
+    SymbolRefAttr:$name
+  );
+
+  let assemblyFormat = [{
+    $name attr-dict
+  }];
+
+  let hasVerifier = 1;
+
+  let extraClassDeclaration = [{
+    mlir::StringAttr getKernelName();
+    mlir::StringAttr getKernelModuleName();
+  }];
+}
+
 #endif // FORTRAN_DIALECT_CUF_CUF_OPS
diff --git a/flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt b/flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt
index 83d468bafdfeb6..b2221199995d58 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt
+++ b/flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt
@@ -14,6 +14,7 @@ add_flang_library(CUFDialect
   FIRDialect
   FIRDialectSupport
   MLIRIR
+  MLIRGPUDialect
   MLIRTargetLLVMIRExport
 
   LINK_COMPONENTS
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
index 7fb2dcf4af115c..9e3bbd1f9cbee9 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -15,6 +15,7 @@
 #include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
 #include "flang/Optimizer/Dialect/FIRAttr.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/IR/BuiltinOps.h"
@@ -253,6 +254,42 @@ llvm::LogicalResult cuf::KernelOp::verify() {
   return mlir::success();
 }
 
+//===----------------------------------------------------------------------===//
+// RegisterKernelOp
+//===----------------------------------------------------------------------===//
+
+mlir::StringAttr cuf::RegisterKernelOp::getKernelModuleName() {
+  return getName().getRootReference();
+}
+
+mlir::StringAttr cuf::RegisterKernelOp::getKernelName() {
+  return getName().getLeafReference();
+}
+
+mlir::LogicalResult cuf::RegisterKernelOp::verify() {
+  if (getKernelName() == getKernelModuleName())
+    return emitOpError("expect a module and a kernel name");
+
+  auto mod = getOperation()->getParentOfType<mlir::ModuleOp>();
+  if (!mod)
+    return emitOpError("expect to be in a module");
+
+  mlir::SymbolTable symTab(mod);
+  auto gpuMod = symTab.lookup<mlir::gpu::GPUModuleOp>(getKernelModuleName());
+  if (!gpuMod)
+    return emitOpError("gpu module not found");
+
+  mlir::SymbolTable gpuSymTab(gpuMod);
+  auto func = gpuSymTab.lookup<mlir::gpu::GPUFuncOp>(getKernelName());
+  if (!func)
+    return emitOpError("device function not found");
+
+  if (!func.isKernel())
+    return emitOpError("only kernel gpu.func can be registered");
+
+  return mlir::success();
+}
+
 // Tablegen operators
 
 #define GET_OP_CLASSES
diff --git a/flang/test/Fir/CUDA/cuda-register-func.fir b/flang/test/Fir/CUDA/cuda-register-func.fir
new file mode 100644
index 00000000000000..a428f68eb3bf42
--- /dev/null
+++ b/flang/test/Fir/CUDA/cuda-register-func.fir
@@ -0,0 +1,20 @@
+// RUN: fir-opt %s | FileCheck %s
+
+module attributes {gpu.container_module} {
+  gpu.module @cuda_device_mod {
+    gpu.func @_QPsub_device1() kernel {
+      gpu.return
+    }
+    gpu.func @_QPsub_device2(%arg0: !fir.ref<f32>) kernel {
+      gpu.return
+    }
+  }
+  llvm.func internal @__cudaFortranConstructor() {
+    cuf.register_kernel @cuda_device_mod::@_QPsub_device1
+    cuf.register_kernel @cuda_device_mod::@_QPsub_device2
+    llvm.return
+  }
+}
+
+// CHECK: cuf.register_kernel @cuda_device_mod::@_QPsub_device1
+// CHECK: cuf.register_kernel @cuda_device_mod::@_QPsub_device2
diff --git a/flang/test/Fir/cuf-invalid.fir b/flang/test/Fir/cuf-invalid.fir
index e9aeaa281e2a85..a5747b8ee4a3b3 100644
--- a/flang/test/Fir/cuf-invalid.fir
+++ b/flang/test/Fir/cuf-invalid.fir
@@ -125,3 +125,53 @@ func.func @_QPsub1(%arg0: !fir.ref<!fir.array<?xf32>> {cuf.data_attr = #cuf.cuda
   cuf.data_transfer %20#0 to %11#0, %19 : !fir.shape<1> {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>
   return
 }
+
+// -----
+
+module attributes {gpu.container_module} {
+  gpu.module @cuda_device_mod {
+    gpu.func @_QPsub_device1() {
+      gpu.return
+    }
+  }
+  llvm.func internal @__cudaFortranConstructor() {
+    // expected-error at +1{{'cuf.register_kernel' op only kernel gpu.func can be registered}}
+    cuf.register_kernel @cuda_device_mod::@_QPsub_device1
+    llvm.return
+  }
+}
+
+// -----
+
+module attributes {gpu.container_module} {
+  gpu.module @cuda_device_mod {
+    gpu.func @_QPsub_device1() {
+      gpu.return
+    }
+  }
+  llvm.func internal @__cudaFortranConstructor() {
+    // expected-error at +1{{'cuf.register_kernel' op device function not found}}
+    cuf.register_kernel @cuda_device_mod::@_QPsub_device2
+    llvm.return
+  }
+}
+
+// -----
+
+module attributes {gpu.container_module} {
+  llvm.func internal @__cudaFortranConstructor() {
+    // expected-error at +1{{'cuf.register_kernel' op gpu module not found}}
+    cuf.register_kernel @cuda_device_mod::@_QPsub_device1
+    llvm.return
+  }
+}
+
+// -----
+
+module attributes {gpu.container_module} {
+  llvm.func internal @__cudaFortranConstructor() {
+    // expected-error at +1{{'cuf.register_kernel' op expect a module and a kernel name}}
+    cuf.register_kernel @_QPsub_device1
+    llvm.return
+  }
+}
diff --git a/flang/tools/fir-opt/fir-opt.cpp b/flang/tools/fir-opt/fir-opt.cpp
index f75fba27c68f08..84a74770cf0303 100644
--- a/flang/tools/fir-opt/fir-opt.cpp
+++ b/flang/tools/fir-opt/fir-opt.cpp
@@ -42,6 +42,7 @@ int main(int argc, char **argv) {
 #endif
   DialectRegistry registry;
   fir::support::registerDialects(registry);
+  registry.insert<mlir::gpu::GPUDialect>();
   fir::support::addFIRExtensions(registry);
   return failed(MlirOptMain(argc, argv, "FIR modular optimizer driver\n",
       registry));



More information about the flang-commits mailing list