[flang-commits] [flang] Reland '[flang][cuda] Add cuf.register_kernel operation' (PR #112389)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Tue Oct 15 09:18:59 PDT 2024
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/112389
The operation will be used in the CUF constructor to register the kernel functions. This allow to delay this until codegen when the gpu.binary will be available.
Reland of #112268 with correct shared library build support.
>From 7d969d6085bed2e2021e76d27ee02a0aac420613 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 14 Oct 2024 14:31:05 -0700
Subject: [PATCH] [flang][cuda] Add cuf.register_kernel operation
The operation will be used in the CUF constructor to register
the kernel functions. This allow to delay this until codegen when
the gpu.binary will be available.
---
.../flang/Optimizer/Dialect/CUF/CUFOps.td | 19 +++++++
.../lib/Optimizer/Dialect/CUF/CMakeLists.txt | 1 +
flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp | 37 ++++++++++++++
flang/test/Fir/CUDA/cuda-register-func.fir | 20 ++++++++
flang/test/Fir/cuf-invalid.fir | 50 +++++++++++++++++++
flang/tools/fir-opt/fir-opt.cpp | 1 +
6 files changed, 128 insertions(+)
create mode 100644 flang/test/Fir/CUDA/cuda-register-func.fir
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index f643674f1d5d6b..98d1ef529738c7 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -288,4 +288,23 @@ def cuf_KernelOp : cuf_Op<"kernel", [AttrSizedOperandSegments,
let hasVerifier = 1;
}
+def cuf_RegisterKernelOp : cuf_Op<"register_kernel", []> {
+ let summary = "Register a CUDA kernel";
+
+ let arguments = (ins
+ SymbolRefAttr:$name
+ );
+
+ let assemblyFormat = [{
+ $name attr-dict
+ }];
+
+ let hasVerifier = 1;
+
+ let extraClassDeclaration = [{
+ mlir::StringAttr getKernelName();
+ mlir::StringAttr getKernelModuleName();
+ }];
+}
+
#endif // FORTRAN_DIALECT_CUF_CUF_OPS
diff --git a/flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt b/flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt
index 83d468bafdfeb6..b2221199995d58 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt
+++ b/flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt
@@ -14,6 +14,7 @@ add_flang_library(CUFDialect
FIRDialect
FIRDialectSupport
MLIRIR
+ MLIRGPUDialect
MLIRTargetLLVMIRExport
LINK_COMPONENTS
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
index 7fb2dcf4af115c..9e3bbd1f9cbee9 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -15,6 +15,7 @@
#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
#include "flang/Optimizer/Dialect/FIRAttr.h"
#include "flang/Optimizer/Dialect/FIRType.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinOps.h"
@@ -253,6 +254,42 @@ llvm::LogicalResult cuf::KernelOp::verify() {
return mlir::success();
}
+//===----------------------------------------------------------------------===//
+// RegisterKernelOp
+//===----------------------------------------------------------------------===//
+
+mlir::StringAttr cuf::RegisterKernelOp::getKernelModuleName() {
+ return getName().getRootReference();
+}
+
+mlir::StringAttr cuf::RegisterKernelOp::getKernelName() {
+ return getName().getLeafReference();
+}
+
+mlir::LogicalResult cuf::RegisterKernelOp::verify() {
+ if (getKernelName() == getKernelModuleName())
+ return emitOpError("expect a module and a kernel name");
+
+ auto mod = getOperation()->getParentOfType<mlir::ModuleOp>();
+ if (!mod)
+ return emitOpError("expect to be in a module");
+
+ mlir::SymbolTable symTab(mod);
+ auto gpuMod = symTab.lookup<mlir::gpu::GPUModuleOp>(getKernelModuleName());
+ if (!gpuMod)
+ return emitOpError("gpu module not found");
+
+ mlir::SymbolTable gpuSymTab(gpuMod);
+ auto func = gpuSymTab.lookup<mlir::gpu::GPUFuncOp>(getKernelName());
+ if (!func)
+ return emitOpError("device function not found");
+
+ if (!func.isKernel())
+ return emitOpError("only kernel gpu.func can be registered");
+
+ return mlir::success();
+}
+
// Tablegen operators
#define GET_OP_CLASSES
diff --git a/flang/test/Fir/CUDA/cuda-register-func.fir b/flang/test/Fir/CUDA/cuda-register-func.fir
new file mode 100644
index 00000000000000..a428f68eb3bf42
--- /dev/null
+++ b/flang/test/Fir/CUDA/cuda-register-func.fir
@@ -0,0 +1,20 @@
+// RUN: fir-opt %s | FileCheck %s
+
+module attributes {gpu.container_module} {
+ gpu.module @cuda_device_mod {
+ gpu.func @_QPsub_device1() kernel {
+ gpu.return
+ }
+ gpu.func @_QPsub_device2(%arg0: !fir.ref<f32>) kernel {
+ gpu.return
+ }
+ }
+ llvm.func internal @__cudaFortranConstructor() {
+ cuf.register_kernel @cuda_device_mod::@_QPsub_device1
+ cuf.register_kernel @cuda_device_mod::@_QPsub_device2
+ llvm.return
+ }
+}
+
+// CHECK: cuf.register_kernel @cuda_device_mod::@_QPsub_device1
+// CHECK: cuf.register_kernel @cuda_device_mod::@_QPsub_device2
diff --git a/flang/test/Fir/cuf-invalid.fir b/flang/test/Fir/cuf-invalid.fir
index e9aeaa281e2a85..a5747b8ee4a3b3 100644
--- a/flang/test/Fir/cuf-invalid.fir
+++ b/flang/test/Fir/cuf-invalid.fir
@@ -125,3 +125,53 @@ func.func @_QPsub1(%arg0: !fir.ref<!fir.array<?xf32>> {cuf.data_attr = #cuf.cuda
cuf.data_transfer %20#0 to %11#0, %19 : !fir.shape<1> {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>
return
}
+
+// -----
+
+module attributes {gpu.container_module} {
+ gpu.module @cuda_device_mod {
+ gpu.func @_QPsub_device1() {
+ gpu.return
+ }
+ }
+ llvm.func internal @__cudaFortranConstructor() {
+ // expected-error at +1{{'cuf.register_kernel' op only kernel gpu.func can be registered}}
+ cuf.register_kernel @cuda_device_mod::@_QPsub_device1
+ llvm.return
+ }
+}
+
+// -----
+
+module attributes {gpu.container_module} {
+ gpu.module @cuda_device_mod {
+ gpu.func @_QPsub_device1() {
+ gpu.return
+ }
+ }
+ llvm.func internal @__cudaFortranConstructor() {
+ // expected-error at +1{{'cuf.register_kernel' op device function not found}}
+ cuf.register_kernel @cuda_device_mod::@_QPsub_device2
+ llvm.return
+ }
+}
+
+// -----
+
+module attributes {gpu.container_module} {
+ llvm.func internal @__cudaFortranConstructor() {
+ // expected-error at +1{{'cuf.register_kernel' op gpu module not found}}
+ cuf.register_kernel @cuda_device_mod::@_QPsub_device1
+ llvm.return
+ }
+}
+
+// -----
+
+module attributes {gpu.container_module} {
+ llvm.func internal @__cudaFortranConstructor() {
+ // expected-error at +1{{'cuf.register_kernel' op expect a module and a kernel name}}
+ cuf.register_kernel @_QPsub_device1
+ llvm.return
+ }
+}
diff --git a/flang/tools/fir-opt/fir-opt.cpp b/flang/tools/fir-opt/fir-opt.cpp
index f75fba27c68f08..84a74770cf0303 100644
--- a/flang/tools/fir-opt/fir-opt.cpp
+++ b/flang/tools/fir-opt/fir-opt.cpp
@@ -42,6 +42,7 @@ int main(int argc, char **argv) {
#endif
DialectRegistry registry;
fir::support::registerDialects(registry);
+ registry.insert<mlir::gpu::GPUDialect>();
fir::support::addFIRExtensions(registry);
return failed(MlirOptMain(argc, argv, "FIR modular optimizer driver\n",
registry));
More information about the flang-commits
mailing list