[flang-commits] [flang] 9ee4fdf - [flang][cuda] Introduce stream cast op (#136050)
via flang-commits
flang-commits at lists.llvm.org
Thu Apr 17 07:25:52 PDT 2025
Author: Valentin Clement (バレンタイン クレメン)
Date: 2025-04-17T07:25:48-07:00
New Revision: 9ee4fdf4990f7dbdc0cfc30a8f733f676dbdcb51
URL: https://github.com/llvm/llvm-project/commit/9ee4fdf4990f7dbdc0cfc30a8f733f676dbdcb51
DIFF: https://github.com/llvm/llvm-project/commit/9ee4fdf4990f7dbdc0cfc30a8f733f676dbdcb51.diff
LOG: [flang][cuda] Introduce stream cast op (#136050)
Cast a stream object reference as a GPU async token. This is useful to
be able to connect the stream representation of CUDA Fortran and the
async mechanism of the GPU dialect.
This op will later become a no op.
Added:
flang/test/Fir/CUDA/cuda-stream.mlir
Modified:
flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
flang/include/flang/Optimizer/Support/InitFIR.h
flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
flang/tools/fir-opt/fir-opt.cpp
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index feef5485194f8..f55f3e8a4466d 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -18,6 +18,7 @@ include "flang/Optimizer/Dialect/CUF/CUFDialect.td"
include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td"
include "flang/Optimizer/Dialect/FIRTypes.td"
include "flang/Optimizer/Dialect/FIRAttr.td"
+include "mlir/Dialect/GPU/IR/GPUBase.td"
include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
include "mlir/Interfaces/LoopLikeInterface.td"
include "mlir/IR/BuiltinAttributes.td"
@@ -370,4 +371,25 @@ def cuf_SharedMemoryOp
CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes)>];
}
+def cuf_StreamCastOp : cuf_Op<"stream_cast", [NoMemoryEffect]> {
+ let summary = "Adapt a stream value to a GPU async token";
+
+ let description = [{
+ Cast a stream object reference as a GPU async token. This is useful to be
+ able to connect the stream representation of CUDA Fortran and the async
+ mechanism of the GPU dialect.
+ Later in the lowering this will become a no op.
+ }];
+
+ let arguments = (ins fir_ReferenceType:$stream);
+
+ let results = (outs GPU_AsyncToken:$token);
+
+ let assemblyFormat = [{
+ $stream attr-dict `:` type($stream)
+ }];
+
+ let hasVerifier = 1;
+}
+
#endif // FORTRAN_DIALECT_CUF_CUF_OPS
diff --git a/flang/include/flang/Optimizer/Support/InitFIR.h b/flang/include/flang/Optimizer/Support/InitFIR.h
index 4c57e01c28c93..1868fbb201970 100644
--- a/flang/include/flang/Optimizer/Support/InitFIR.h
+++ b/flang/include/flang/Optimizer/Support/InitFIR.h
@@ -40,7 +40,7 @@ namespace fir::support {
mlir::cf::ControlFlowDialect, mlir::func::FuncDialect, \
mlir::vector::VectorDialect, mlir::math::MathDialect, \
mlir::complex::ComplexDialect, mlir::DLTIDialect, cuf::CUFDialect, \
- mlir::NVVM::NVVMDialect
+ mlir::NVVM::NVVMDialect, mlir::gpu::GPUDialect
#define FLANG_CODEGEN_DIALECT_LIST FIRCodeGenDialect, mlir::LLVM::LLVMDialect
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
index 957e4c01fb4a1..ce197d48d4860 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -319,6 +319,17 @@ void cuf::SharedMemoryOp::build(
result.addAttributes(attributes);
}
+//===----------------------------------------------------------------------===//
+// StreamCastOp
+//===----------------------------------------------------------------------===//
+
+llvm::LogicalResult cuf::StreamCastOp::verify() {
+ auto refTy = mlir::dyn_cast<fir::ReferenceType>(getStream().getType());
+ if (!refTy.getEleTy().isInteger(64))
+ return emitOpError("stream is expected to be a i64 reference");
+ return mlir::success();
+}
+
// Tablegen operators
#define GET_OP_CLASSES
diff --git a/flang/test/Fir/CUDA/cuda-stream.mlir b/flang/test/Fir/CUDA/cuda-stream.mlir
new file mode 100644
index 0000000000000..50f230467854b
--- /dev/null
+++ b/flang/test/Fir/CUDA/cuda-stream.mlir
@@ -0,0 +1,21 @@
+// RUN: fir-opt --split-input-file %s | FileCheck %s
+
+module attributes {gpu.container_module} {
+ gpu.module @cuda_device_mod {
+ gpu.func @_QMmod1Psub1() kernel {
+ gpu.return
+ }
+ }
+ func.func @_QMmod1Phost_sub() {
+ %0 = fir.alloca i64
+ %1 = arith.constant 1 : index
+ %asyncTok = cuf.stream_cast %0 : !fir.ref<i64>
+ gpu.launch_func [%asyncTok] @cuda_device_mod::@_QMmod1Psub1 blocks in (%1, %1, %1) threads in (%1, %1, %1) args() {cuf.proc_attr = #cuf.cuda_proc<grid_global>}
+ return
+ }
+}
+
+// CHECK-LABEL: func.func @_QMmod1Phost_sub()
+// CHECK: %[[STREAM:.*]] = fir.alloca i64
+// CHECK: %[[TOKEN:.*]] = cuf.stream_cast %[[STREAM]] : <i64>
+// CHECK: gpu.launch_func [%[[TOKEN]]] @cuda_device_mod::@_QMmod1Psub1
diff --git a/flang/tools/fir-opt/fir-opt.cpp b/flang/tools/fir-opt/fir-opt.cpp
index ef510ff77ad25..d66fc3f08bdf8 100644
--- a/flang/tools/fir-opt/fir-opt.cpp
+++ b/flang/tools/fir-opt/fir-opt.cpp
@@ -44,8 +44,6 @@ int main(int argc, char **argv) {
#endif
DialectRegistry registry;
fir::support::registerDialects(registry);
- registry.insert<mlir::gpu::GPUDialect>();
- registry.insert<mlir::NVVM::NVVMDialect>();
fir::support::addFIRExtensions(registry);
return failed(MlirOptMain(argc, argv, "FIR modular optimizer driver\n",
registry));
More information about the flang-commits
mailing list