[flang-commits] [flang] e826168 - [flang][cuda] Add CUFLaunchAttachAttr pass (#174465)
via flang-commits
flang-commits at lists.llvm.org
Mon Jan 5 13:05:39 PST 2026
Author: Valentin Clement (バレンタイン クレメン)
Date: 2026-01-05T13:05:34-08:00
New Revision: e826168a24479c7613f098daa19248d4b494ee96
URL: https://github.com/llvm/llvm-project/commit/e826168a24479c7613f098daa19248d4b494ee96
DIFF: https://github.com/llvm/llvm-project/commit/e826168a24479c7613f098daa19248d4b494ee96.diff
LOG: [flang][cuda] Add CUFLaunchAttachAttr pass (#174465)
CUF kernel are generated via gpu.launch and then outlined. The resulting
launch operation needs to hava a CUDA attribute attached so it will
be distinguishable from other launch.
Added:
flang/lib/Optimizer/Transforms/CUDA/CUFLaunchAttachAttr.cpp
flang/test/Fir/CUDA/cuda-launch-attach-attr.mlir
Modified:
flang/include/flang/Optimizer/Transforms/Passes.h
flang/include/flang/Optimizer/Transforms/Passes.td
flang/lib/Optimizer/Transforms/CMakeLists.txt
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 4dcdddaac8ee5..50e8e6c58bf62 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -9,6 +9,7 @@
#ifndef FORTRAN_OPTIMIZER_TRANSFORMS_PASSES_H
#define FORTRAN_OPTIMIZER_TRANSFORMS_PASSES_H
+#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index d457cbbb32798..a2a3341bfa667 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -523,6 +523,16 @@ def CUFComputeSharedMemoryOffsetsAndSize
];
}
+def CUFLaunchAttachAttr : Pass<"cuf-launch-attach-attr", ""> {
+ let summary = "Attach CUDA attribute to CUF kernel generated launch";
+ let description = [{
+ CUF kernel is generated via gpu.launch and then outlined. The resulting
+ launch operation needs to hava a CUDA attribute attached so it will
+ be distinguishable from other launch.
+ }];
+ let dependentDialects = ["cuf::CUFDialect"];
+}
+
def CUFPredefinedVarToGPU
: Pass<"cuf-predefined-var-to-gpu", "::mlir::func::FuncOp"> {
let summary = "Transform predefined variables to GPU operations";
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index c07921b23f3ff..1e2d6e8f15e92 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -10,6 +10,7 @@ add_flang_library(FIRTransforms
ConstantArgumentGlobalisation.cpp
ControlFlowConverter.cpp
CUDA/CUFAllocationConversion.cpp
+ CUDA/CUFLaunchAttachAttr.cpp
CUDA/CUFPredefinedVarToGPU.cpp
CUFAddConstructor.cpp
CUFDeviceGlobal.cpp
diff --git a/flang/lib/Optimizer/Transforms/CUDA/CUFLaunchAttachAttr.cpp b/flang/lib/Optimizer/Transforms/CUDA/CUFLaunchAttachAttr.cpp
new file mode 100644
index 0000000000000..41a0e5c7dceec
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/CUDA/CUFLaunchAttachAttr.cpp
@@ -0,0 +1,70 @@
+//===-- CUFLaunchAttachAttr.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIROpsSupport.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+namespace fir {
+#define GEN_PASS_DEF_CUFLAUNCHATTACHATTR
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+using namespace mlir;
+
+namespace {
+
+static constexpr llvm::StringRef cudaKernelInfix = "_cufk_";
+
+class CUFGPUAttachAttrPattern
+ : public OpRewritePattern<mlir::gpu::LaunchFuncOp> {
+ using OpRewritePattern<mlir::gpu::LaunchFuncOp>::OpRewritePattern;
+ LogicalResult matchAndRewrite(mlir::gpu::LaunchFuncOp op,
+ PatternRewriter &rewriter) const override {
+ op->setAttr(cuf::getProcAttrName(),
+ cuf::ProcAttributeAttr::get(op.getContext(),
+ cuf::ProcAttribute::Global));
+ return mlir::success();
+ }
+};
+
+struct CUFLaunchAttachAttr
+ : public fir::impl::CUFLaunchAttachAttrBase<CUFLaunchAttachAttr> {
+
+ void runOnOperation() override {
+ auto *context = &this->getContext();
+
+ mlir::RewritePatternSet patterns(context);
+ patterns.add<CUFGPUAttachAttrPattern>(context);
+
+ mlir::ConversionTarget target(*context);
+ target.addIllegalOp<mlir::gpu::LaunchFuncOp>();
+ target.addDynamicallyLegalOp<mlir::gpu::LaunchFuncOp>(
+ [&](mlir::gpu::LaunchFuncOp op) -> bool {
+ if (op.getKernelName().getValue().contains(cudaKernelInfix)) {
+ if (op.getOperation()->getAttrOfType<cuf::ProcAttributeAttr>(
+ cuf::getProcAttrName()))
+ return true;
+ return false;
+ }
+ return true;
+ });
+
+ if (mlir::failed(mlir::applyPartialConversion(this->getOperation(), target,
+ std::move(patterns)))) {
+ mlir::emitError(mlir::UnknownLoc::get(context),
+ "Pattern conversion failed\n");
+ this->signalPassFailure();
+ }
+ }
+};
+
+} // end anonymous namespace
diff --git a/flang/test/Fir/CUDA/cuda-launch-attach-attr.mlir b/flang/test/Fir/CUDA/cuda-launch-attach-attr.mlir
new file mode 100644
index 0000000000000..efaea1d9ba63b
--- /dev/null
+++ b/flang/test/Fir/CUDA/cuda-launch-attach-attr.mlir
@@ -0,0 +1,19 @@
+// RUN: fir-opt --split-input-file --cuf-launch-attach-attr %s | FileCheck %s
+
+module attributes {gpu.container_module} {
+ func.func @_QQmain() attributes {fir.bindc_name = "test"} {
+ %0 = arith.constant 1 : i64
+ %1 = arith.constant 2 : i64
+ %3 = arith.constant 10 : i64
+ gpu.launch_func @cuda_device_mod::@_QMtest_cufk_20 blocks in (%3, %3, %0) threads in (%3, %3, %0) : i64
+ gpu.launch_func @cuda_device_mod::@_QMtest2 blocks in (%3, %3, %0) threads in (%3, %3, %0) : i64
+ gpu.launch_func @cuda_device_mod::@_QMtest_cufk_22 blocks in (%3, %3, %0) threads in (%3, %3, %0) : i64 {cuf.proc_attr = #cuf.cuda_proc<global>}
+ return
+ }
+ gpu.binary @cuda_device_mod [#gpu.object<#nvvm.target, "">]
+}
+
+// CHECK-LABEL: func.func @_QQmain()
+// CHECK: gpu.launch_func @cuda_device_mod::@_QMtest_cufk_20 blocks in ({{.*}}) threads in ({{.*}}) : i64 {cuf.proc_attr = #cuf.cuda_proc<global>}
+// CHECK: gpu.launch_func @cuda_device_mod::@_QMtest2 blocks in ({{.*}}) threads in ({{.*}}) : i64 {{$}}
+// CHECK: gpu.launch_func @cuda_device_mod::@_QMtest_cufk_22 blocks in ({{.*}}) threads in ({{.*}}) : i64 {cuf.proc_attr = #cuf.cuda_proc<global>}
More information about the flang-commits
mailing list