[flang-commits] [flang] [flang][cuda] Move CUDA Fortran operations to a CUF dialect (PR #92317)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Wed May 15 14:13:14 PDT 2024
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/92317
>From b64a51ff988b16594756498ddbf3f04446445d27 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 14 May 2024 10:08:13 -0700
Subject: [PATCH 1/2] [flang][cuda] Move CUDA Fortran operations to a CUF
dialect
---
flang/include/flang/Lower/ConvertVariable.h | 10 +-
.../flang/Optimizer/Builder/FIRBuilder.h | 4 +-
.../flang/Optimizer/Builder/HLFIRTools.h | 2 +-
.../flang/Optimizer/Dialect/CMakeLists.txt | 2 +
.../Optimizer/Dialect/CUF/CMakeLists.txt | 11 +
.../flang/Optimizer/Dialect/CUF/CUFDialect.h | 26 ++
.../flang/Optimizer/Dialect/CUF/CUFDialect.td | 43 +++
.../flang/Optimizer/Dialect/CUF/CUFOps.h | 20 ++
.../flang/Optimizer/Dialect/CUF/CUFOps.td | 263 ++++++++++++++++++
.../flang/Optimizer/Dialect/FIRAttr.td | 83 ------
.../include/flang/Optimizer/Dialect/FIROps.h | 1 +
.../include/flang/Optimizer/Dialect/FIROps.td | 242 +---------------
.../flang/Optimizer/Dialect/FIROpsSupport.h | 13 -
.../include/flang/Optimizer/HLFIR/HLFIROps.td | 5 +-
.../include/flang/Optimizer/Support/InitFIR.h | 3 +-
flang/include/flang/Optimizer/Support/Utils.h | 61 +---
flang/lib/Frontend/CMakeLists.txt | 2 +
flang/lib/Lower/Allocatable.cpp | 17 +-
flang/lib/Lower/Bridge.cpp | 52 ++--
flang/lib/Lower/CMakeLists.txt | 4 +
flang/lib/Lower/CallInterface.cpp | 21 +-
flang/lib/Lower/ConvertCall.cpp | 3 +-
flang/lib/Lower/ConvertVariable.cpp | 57 ++--
flang/lib/Optimizer/Builder/FIRBuilder.cpp | 8 +-
flang/lib/Optimizer/Builder/HLFIRTools.cpp | 4 +-
flang/lib/Optimizer/Dialect/CMakeLists.txt | 3 +
.../lib/Optimizer/Dialect/CUF/CMakeLists.txt | 22 ++
.../lib/Optimizer/Dialect/CUF/CUFDialect.cpp | 25 ++
flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp | 219 +++++++++++++++
flang/lib/Optimizer/Dialect/FIRAttr.cpp | 4 +-
flang/lib/Optimizer/Dialect/FIRDialect.cpp | 1 +
flang/lib/Optimizer/Dialect/FIROps.cpp | 163 -----------
flang/lib/Optimizer/HLFIR/IR/CMakeLists.txt | 2 +
flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp | 4 +-
.../Optimizer/HLFIR/Transforms/CMakeLists.txt | 2 +
.../HLFIR/Transforms/ConvertToFIR.cpp | 8 +-
flang/test/Fir/cuf-invalid.fir | 50 ++--
flang/test/Fir/cuf.mlir | 46 +--
flang/test/Lower/CUDA/cuda-allocatable.cuf | 48 ++--
flang/test/Lower/CUDA/cuda-data-attribute.cuf | 60 ++--
flang/test/Lower/CUDA/cuda-data-transfer.cuf | 50 ++--
flang/test/Lower/CUDA/cuda-kernel-calls.cuf | 12 +-
.../Lower/CUDA/cuda-kernel-loop-directive.cuf | 10 +-
flang/test/Lower/CUDA/cuda-mod.cuf | 4 +-
flang/test/Lower/CUDA/cuda-module-use.cuf | 8 +-
flang/test/Lower/CUDA/cuda-proc-attribute.cuf | 26 +-
flang/tools/bbc/CMakeLists.txt | 2 +
flang/tools/fir-opt/CMakeLists.txt | 2 +
flang/tools/tco/CMakeLists.txt | 2 +
flang/unittests/Optimizer/CMakeLists.txt | 2 +
.../Optimizer/FortranVariableTest.cpp | 8 +-
51 files changed, 923 insertions(+), 817 deletions(-)
create mode 100644 flang/include/flang/Optimizer/Dialect/CUF/CMakeLists.txt
create mode 100644 flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.h
create mode 100644 flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.td
create mode 100644 flang/include/flang/Optimizer/Dialect/CUF/CUFOps.h
create mode 100644 flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
create mode 100644 flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt
create mode 100644 flang/lib/Optimizer/Dialect/CUF/CUFDialect.cpp
create mode 100644 flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
diff --git a/flang/include/flang/Lower/ConvertVariable.h b/flang/include/flang/Lower/ConvertVariable.h
index d70d3268acac1..515f4695951b4 100644
--- a/flang/include/flang/Lower/ConvertVariable.h
+++ b/flang/include/flang/Lower/ConvertVariable.h
@@ -23,6 +23,10 @@
#include "mlir/IR/Value.h"
#include "llvm/ADT/DenseMap.h"
+namespace cuf {
+class DataAttributeAttr;
+}
+
namespace fir {
class ExtendedValue;
class FirOpBuilder;
@@ -146,9 +150,9 @@ translateSymbolAttributes(mlir::MLIRContext *mlirContext,
/// Translate the CUDA Fortran attributes of \p sym into the FIR CUDA attribute
/// representation.
-fir::CUDADataAttributeAttr
-translateSymbolCUDADataAttribute(mlir::MLIRContext *mlirContext,
- const Fortran::semantics::Symbol &sym);
+cuf::DataAttributeAttr
+translateSymbolCUFDataAttribute(mlir::MLIRContext *mlirContext,
+ const Fortran::semantics::Symbol &sym);
/// Map a symbol to a given fir::ExtendedValue. This will generate an
/// hlfir.declare when lowering to HLFIR and map the hlfir.declare result to the
diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index 0d650f830b64e..287730ef2ac85 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -254,13 +254,13 @@ class FirOpBuilder : public mlir::OpBuilder, public mlir::OpBuilder::Listener {
mlir::StringAttr linkage = {},
mlir::Attribute value = {}, bool isConst = false,
bool isTarget = false,
- fir::CUDADataAttributeAttr cudaAttr = {});
+ cuf::DataAttributeAttr dataAttr = {});
fir::GlobalOp createGlobal(mlir::Location loc, mlir::Type type,
llvm::StringRef name, bool isConst, bool isTarget,
std::function<void(FirOpBuilder &)> bodyBuilder,
mlir::StringAttr linkage = {},
- fir::CUDADataAttributeAttr cudaAttr = {});
+ cuf::DataAttributeAttr dataAttr = {});
/// Create a global constant (read-only) value.
fir::GlobalOp createGlobalConstant(mlir::Location loc, mlir::Type type,
diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6cc8e71b3b18d..43aa1661550ec 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -239,7 +239,7 @@ genDeclare(mlir::Location loc, fir::FirOpBuilder &builder,
const fir::ExtendedValue &exv, llvm::StringRef name,
fir::FortranVariableFlagsAttr flags,
mlir::Value dummyScope = nullptr,
- fir::CUDADataAttributeAttr cudaAttr = {});
+ cuf::DataAttributeAttr dataAttr = {});
/// Generate an hlfir.associate to build a variable from an expression value.
/// The type of the variable must be provided so that scalar logicals are
diff --git a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
index f00993d4d3778..301a93c1fe5b4 100644
--- a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
+++ b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
@@ -1,3 +1,5 @@
+add_subdirectory(CUF)
+
# This replicates part of the add_mlir_dialect cmake function from MLIR that
# cannot be used her because it expects to be run inside MLIR directory which
# is not the case for FIR.
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CMakeLists.txt b/flang/include/flang/Optimizer/Dialect/CUF/CMakeLists.txt
new file mode 100644
index 0000000000000..07490c7b9ca2c
--- /dev/null
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_subdirectory(Attributes)
+
+set(LLVM_TARGET_DEFINITIONS CUFDialect.td)
+mlir_tablegen(CUFDialect.h.inc -gen-dialect-decls -dialect=cuf)
+mlir_tablegen(CUFDialect.cpp.inc -gen-dialect-defs -dialect=cuf)
+
+set(LLVM_TARGET_DEFINITIONS CUFOps.td)
+mlir_tablegen(CUFOps.h.inc -gen-op-decls)
+mlir_tablegen(CUFOps.cpp.inc -gen-op-defs)
+
+add_public_tablegen_target(CUFOpsIncGen)
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.h b/flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.h
new file mode 100644
index 0000000000000..cf562b2268355
--- /dev/null
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.h
@@ -0,0 +1,26 @@
+//===-- Optimizer/Dialect/CUFDialect.h -- CUF dialect -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_DIALECT_CUF_CUFDIALECT_H
+#define FORTRAN_OPTIMIZER_DIALECT_CUF_CUFDIALECT_H
+
+#include "mlir/Bytecode/BytecodeOpInterface.h"
+#include "mlir/IR/Dialect.h"
+#include "mlir/IR/SymbolTable.h"
+#include "mlir/Interfaces/CallInterfaces.h"
+#include "mlir/Interfaces/FunctionInterfaces.h"
+#include "mlir/Interfaces/LoopLikeInterface.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
+
+#include "flang/Optimizer/Dialect/CUF/CUFDialect.h.inc"
+
+#endif // FORTRAN_OPTIMIZER_DIALECT_CUF_CUFDIALECT_H
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.td
new file mode 100644
index 0000000000000..df866e5664068
--- /dev/null
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.td
@@ -0,0 +1,43 @@
+//===-- CUFDialect.td - CUF dialect base definitions -------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Definition of the CUDA Fortran dialect
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_DIALECT_CUF_CUFDIALECT
+#define FORTRAN_DIALECT_CUF_CUFDIALECT
+
+include "mlir/IR/AttrTypeBase.td"
+include "mlir/IR/EnumAttr.td"
+include "mlir/IR/OpBase.td"
+
+def CUFDialect : Dialect {
+ let name = "cuf";
+
+ let summary = "CUDA Fortran dialect";
+
+ let description = [{
+ This dialect models CUDA Fortran operations. The CUF dialect operations use
+ the FIR types and are tightly coupled with FIR and HLFIR.
+ }];
+
+ let useDefaultAttributePrinterParser = 1;
+ let usePropertiesForAttributes = 1;
+ let cppNamespace = "::cuf";
+ let dependentDialects = ["fir::FIROpsDialect"];
+
+ let extraClassDeclaration = [{
+ private:
+ // Register the CUF Attributes.
+ void registerAttributes();
+ }];
+}
+
+#endif // FORTRAN_DIALECT_CUF_CUFDIALECT
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.h b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.h
new file mode 100644
index 0000000000000..4132db672e394
--- /dev/null
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.h
@@ -0,0 +1,20 @@
+//===-- Optimizer/Dialect/CUF/CUFOps.h - CUF operations ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_DIALECT_CUF_CUFOPS_H
+#define FORTRAN_OPTIMIZER_DIALECT_CUF_CUFOPS_H
+
+#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
+#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "mlir/IR/OpDefinition.h"
+
+#define GET_OP_CLASSES
+#include "flang/Optimizer/Dialect/CUF/CUFOps.h.inc"
+
+#endif // FORTRAN_OPTIMIZER_DIALECT_CUF_CUFOPS_H
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
new file mode 100644
index 0000000000000..6ec2693077282
--- /dev/null
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -0,0 +1,263 @@
+//===-- CUFOps.td - CUF operation definitions --------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Definition of the CUF dialect operations
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_DIALECT_CUF_CUF_OPS
+#define FORTRAN_DIALECT_CUF_CUF_OPS
+
+include "flang/Optimizer/Dialect/CUF/CUFDialect.td"
+include "flang/Optimizer/Dialect/CUF/CUFAttr.td"
+include "flang/Optimizer/Dialect/FIRTypes.td"
+include "mlir/Interfaces/LoopLikeInterface.td"
+include "mlir/IR/BuiltinAttributes.td"
+
+class cuf_Op<string mnemonic, list<Trait> traits>
+ : Op<CUFDialect, mnemonic, traits>;
+
+def cuf_AllocOp : cuf_Op<"alloc", [AttrSizedOperandSegments,
+ MemoryEffects<[MemAlloc]>]> {
+ let summary = "Allocate an object on device";
+
+ let description = [{
+ This is a drop in replacement for fir.alloca and fir.allocmem for device
+ object. Any device, managed or unified object declared in an host
+ subprogram needs to be allocated in the device memory through runtime calls.
+ The cuf.alloc is an abstraction to the runtime calls and works together
+ with cuf.free.
+ }];
+
+ let arguments = (ins
+ TypeAttr:$in_type,
+ OptionalAttr<StrAttr>:$uniq_name,
+ OptionalAttr<StrAttr>:$bindc_name,
+ Variadic<AnyIntegerType>:$typeparams,
+ Variadic<AnyIntegerType>:$shape,
+ cuf_DataAttributeAttr:$data_attr
+ );
+
+ let results = (outs fir_ReferenceType:$ptr);
+
+ let assemblyFormat = [{
+ $in_type (`(` $typeparams^ `:` type($typeparams) `)`)?
+ (`,` $shape^ `:` type($shape) )? attr-dict `->` qualified(type($ptr))
+ }];
+
+ let builders = [
+ OpBuilder<(ins "mlir::Type":$inType, "llvm::StringRef":$uniqName,
+ "llvm::StringRef":$bindcName,
+ "cuf::DataAttributeAttr":$cudaAttr,
+ CArg<"mlir::ValueRange", "{}">:$typeparams,
+ CArg<"mlir::ValueRange", "{}">:$shape,
+ CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes)>];
+
+ let hasVerifier = 1;
+}
+
+def cuf_FreeOp : cuf_Op<"free", [MemoryEffects<[MemFree]>]> {
+ let summary = "Free a device allocated object";
+
+ let description = [{
+ The cuf.free operation frees the memory allocated by cuf.alloc.
+ This is used for non-allocatable device, managed and unified device
+ variables declare in host subprogram.
+ }];
+
+ let arguments = (ins
+ Arg<AnyReferenceLike, "", [MemFree]>:$devptr,
+ cuf_DataAttributeAttr:$data_attr
+ );
+
+ let assemblyFormat = "$devptr `:` qualified(type($devptr)) attr-dict";
+
+ let hasVerifier = 1;
+}
+
+def cuf_AllocateOp : cuf_Op<"allocate", [AttrSizedOperandSegments,
+ MemoryEffects<[MemAlloc<DefaultResource>]>]> {
+ let summary = "Perform the device allocation of data of an allocatable";
+
+ let description = [{
+ The cuf.allocate operation performs the allocation on the device
+ of the data of an allocatable. The descriptor passed to the operation
+ is initialized before with the standard flang runtime calls.
+ }];
+
+ let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
+ Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
+ Optional<AnyIntegerType>:$stream,
+ Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$pinned,
+ Arg<Optional<AnyRefOrBoxType>, "", [MemRead]>:$source,
+ cuf_DataAttributeAttr:$data_attr,
+ UnitAttr:$hasStat);
+
+ let results = (outs AnyIntegerType:$stat);
+
+ let assemblyFormat = [{
+ $box `:` qualified(type($box))
+ ( `source` `(` $source^ `:` qualified(type($source) )`)` )?
+ ( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )?
+ ( `stream` `(` $stream^ `:` type($stream) `)` )?
+ ( `pinned` `(` $pinned^ `:` type($pinned) `)` )?
+ attr-dict `->` type($stat)
+ }];
+
+ let hasVerifier = 1;
+}
+
+def cuf_DeallocateOp : cuf_Op<"deallocate",
+ [MemoryEffects<[MemFree<DefaultResource>]>]> {
+ let summary = "Perform the device deallocation of data of an allocatable";
+
+ let description = [{
+ The cuf.deallocate operation performs the deallocation on the device
+ of the data of an allocatable.
+ }];
+
+ let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
+ Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
+ cuf_DataAttributeAttr:$data_attr,
+ UnitAttr:$hasStat);
+
+ let results = (outs AnyIntegerType:$stat);
+
+ let assemblyFormat = [{
+ $box `:` qualified(type($box))
+ ( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )?
+ attr-dict `->` type($stat)
+ }];
+
+ let hasVerifier = 1;
+}
+
+def cuf_DataTransferOp : cuf_Op<"data_transfer", []> {
+ let summary = "Represent a data transfer between host and device memory";
+
+ let description = [{
+ CUDA Fortran allows data transfer to be done via intrinsic assignment
+ between a host and a device variable. This operation is used to materialized
+ the data transfer between the lhs and rhs memory references.
+ The kind of transfer is specified in the attribute.
+
+ ```
+ adev = a ! transfer host to device
+ a = adev ! transfer device to host
+ bdev = adev ! transfer device to device
+ ```
+ }];
+
+ let arguments = (ins Arg<AnyReferenceLike, "", [MemWrite]>:$src,
+ Arg<AnyReferenceLike, "", [MemRead]>:$dst,
+ cuf_DataTransferKindAttr:$transfer_kind);
+
+ let assemblyFormat = [{
+ $src `to` $dst attr-dict `:` type(operands)
+ }];
+}
+
+def cuf_KernelLaunchOp : cuf_Op<"kernel_launch", [CallOpInterface,
+ AttrSizedOperandSegments]> {
+ let summary = "call CUDA kernel";
+
+ let description = [{
+ Launch a CUDA kernel from the host.
+
+ ```
+ // launch simple kernel with no arguments. bytes and stream value are
+ // optional in the chevron notation.
+ cuf.kernel_launch @kernel<<<%gx, %gy, %bx, %by, %bz>>>()
+ ```
+ }];
+
+ let arguments = (ins
+ SymbolRefAttr:$callee,
+ I32:$grid_x,
+ I32:$grid_y,
+ I32:$grid_z,
+ I32:$block_x,
+ I32:$block_y,
+ I32:$block_z,
+ Optional<I32>:$bytes,
+ Optional<I32>:$stream,
+ Variadic<AnyType>:$args
+ );
+
+ let assemblyFormat = [{
+ $callee `<` `<` `<` $grid_x `,` $grid_y `,` $grid_z `,`$block_x `,`
+ $block_y `,` $block_z ( `,` $bytes^ ( `,` $stream^ )? )? `>` `>` `>`
+ `` `(` $args `)` ( `:` `(` type($args)^ `)` )? attr-dict
+ }];
+
+ let extraClassDeclaration = [{
+ mlir::CallInterfaceCallable getCallableForCallee() {
+ return getCalleeAttr();
+ }
+
+ void setCalleeFromCallable(mlir::CallInterfaceCallable callee) {
+ (*this)->setAttr(getCalleeAttrName(), callee.get<mlir::SymbolRefAttr>());
+ }
+ mlir::FunctionType getFunctionType();
+
+ unsigned getNbNoArgOperand() {
+ unsigned nbNoArgOperand = 5; // grids and blocks values are always present.
+ if (getBytes()) ++nbNoArgOperand;
+ if (getStream()) ++nbNoArgOperand;
+ return nbNoArgOperand;
+ }
+
+ operand_range getArgOperands() {
+ return {operand_begin() + getNbNoArgOperand(), operand_end()};
+ }
+ mlir::MutableOperandRange getArgOperandsMutable() {
+ return mlir::MutableOperandRange(
+ *this, getNbNoArgOperand(), getArgs().size() - 1);
+ }
+ }];
+}
+
+def cuf_KernelOp : cuf_Op<"kernel", [AttrSizedOperandSegments,
+ DeclareOpInterfaceMethods<LoopLikeOpInterface>]> {
+
+ let description = [{
+ Represent the CUDA Fortran kernel directive. The operation is a loop like
+ operation that represents the iteration range of the embedded loop nest.
+
+ When grid or block variadic operands are empty, a `*` only syntax was used
+ in the Fortran code.
+ If the `*` is mixed with values for either grid or block, these are
+ represented by a 0 constant value.
+ }];
+
+ let arguments = (ins
+ Variadic<I32>:$grid, // empty means `*`
+ Variadic<I32>:$block, // empty means `*`
+ Optional<I32>:$stream,
+ Variadic<Index>:$lowerbound,
+ Variadic<Index>:$upperbound,
+ Variadic<Index>:$step,
+ OptionalAttr<I64Attr>:$n
+ );
+
+ let regions = (region AnyRegion:$region);
+
+ let assemblyFormat = [{
+ `<` `<` `<` custom<CUFKernelValues>($grid, type($grid)) `,`
+ custom<CUFKernelValues>($block, type($block))
+ ( `,` `stream` `=` $stream^ )? `>` `>` `>`
+ custom<CUFKernelLoopControl>($region, $lowerbound, type($lowerbound),
+ $upperbound, type($upperbound), $step, type($step))
+ attr-dict
+ }];
+
+ let hasVerifier = 1;
+}
+
+#endif // FORTRAN_DIALECT_CUF_CUF_OPS
diff --git a/flang/include/flang/Optimizer/Dialect/FIRAttr.td b/flang/include/flang/Optimizer/Dialect/FIRAttr.td
index f8b3fb861cc62..989319ff3ddaf 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRAttr.td
+++ b/flang/include/flang/Optimizer/Dialect/FIRAttr.td
@@ -70,87 +70,4 @@ def fir_BoxFieldAttr : I32EnumAttr<
// mlir::SideEffects::Resource for modelling operations which add debugging information
def DebuggingResource : Resource<"::fir::DebuggingResource">;
-//===----------------------------------------------------------------------===//
-// CUDA Fortran specific attributes
-//===----------------------------------------------------------------------===//
-
-def fir_CUDADataAttribute : I32EnumAttr<
- "CUDADataAttribute",
- "CUDA Fortran variable attributes",
- [
- I32EnumAttrCase<"Constant", 0, "constant">,
- I32EnumAttrCase<"Device", 1, "device">,
- I32EnumAttrCase<"Managed", 2, "managed">,
- I32EnumAttrCase<"Pinned", 3, "pinned">,
- I32EnumAttrCase<"Shared", 4, "shared">,
- I32EnumAttrCase<"Unified", 5, "unified">,
- // Texture is omitted since it is obsolete and rejected by semantic.
- ]> {
- let genSpecializedAttr = 0;
- let cppNamespace = "::fir";
-}
-
-def fir_CUDADataAttributeAttr :
- EnumAttr<FIROpsDialect, fir_CUDADataAttribute, "cuda"> {
- let assemblyFormat = [{ ```<` $value `>` }];
-}
-
-def fir_CUDAProcAttribute : I32EnumAttr<
- "CUDAProcAttribute", "CUDA Fortran procedure attributes",
- [
- I32EnumAttrCase<"Host", 0, "host">,
- I32EnumAttrCase<"Device", 1, "device">,
- I32EnumAttrCase<"HostDevice", 2, "host_device">,
- I32EnumAttrCase<"Global", 3, "global">,
- I32EnumAttrCase<"GridGlobal", 4, "grid_global">,
- ]> {
- let genSpecializedAttr = 0;
- let cppNamespace = "::fir";
-}
-
-def fir_CUDAProcAttributeAttr :
- EnumAttr<FIROpsDialect, fir_CUDAProcAttribute, "cuda_proc"> {
- let assemblyFormat = [{ ```<` $value `>` }];
-}
-
-def fir_CUDALaunchBoundsAttr : fir_Attr<"CUDALaunchBounds"> {
- let mnemonic = "launch_bounds";
-
- let parameters = (ins
- "mlir::IntegerAttr":$maxTPB,
- "mlir::IntegerAttr":$minBPM,
- OptionalParameter<"mlir::IntegerAttr">:$upperBoundClusterSize
- );
-
- let assemblyFormat = "`<` struct(params) `>`";
-}
-
-def fir_CUDAClusterDimsAttr : fir_Attr<"CUDAClusterDims"> {
- let mnemonic = "cluster_dims";
-
- let parameters = (ins
- "mlir::IntegerAttr":$x,
- "mlir::IntegerAttr":$y,
- "mlir::IntegerAttr":$z
- );
-
- let assemblyFormat = "`<` struct(params) `>`";
-}
-
-def fir_CUDADataTransferKind : I32EnumAttr<
- "CUDADataTransferKind", "CUDA Fortran data transfer kind",
- [
- I32EnumAttrCase<"DeviceHost", 0, "device_host">,
- I32EnumAttrCase<"HostDevice", 1, "host_device">,
- I32EnumAttrCase<"DeviceDevice", 2, "device_device">,
- ]> {
- let genSpecializedAttr = 0;
- let cppNamespace = "::fir";
-}
-
-def fir_CUDADataTransferKindAttr :
- EnumAttr<FIROpsDialect, fir_CUDADataTransferKind, "cuda_transfer"> {
- let assemblyFormat = [{ ```<` $value `>` }];
-}
-
#endif // FIR_DIALECT_FIR_ATTRS
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.h b/flang/include/flang/Optimizer/Dialect/FIROps.h
index 016ad0433ed84..9f07364ddb627 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.h
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.h
@@ -9,6 +9,7 @@
#ifndef FORTRAN_OPTIMIZER_DIALECT_FIROPS_H
#define FORTRAN_OPTIMIZER_DIALECT_FIROPS_H
+#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
#include "flang/Optimizer/Dialect/FIRAttr.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/Dialect/FirAliasTagOpInterface.h"
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 64c5e360b28f7..d9c1149040066 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -17,6 +17,7 @@
include "mlir/Dialect/Arith/IR/ArithBase.td"
include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td"
include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td"
+include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td"
include "flang/Optimizer/Dialect/FIRDialect.td"
include "flang/Optimizer/Dialect/FIRTypes.td"
include "flang/Optimizer/Dialect/FIRAttr.td"
@@ -2436,66 +2437,6 @@ def fir_DispatchOp : fir_Op<"dispatch", []> {
}];
}
-def fir_CUDAKernelLaunch : fir_Op<"cuda_kernel_launch", [CallOpInterface,
- AttrSizedOperandSegments]> {
- let summary = "call CUDA kernel";
-
- let description = [{
- Launch a CUDA kernel from the host.
-
- ```
- // launch simple kernel with no arguments. bytes and stream value are
- // optional in the chevron notation.
- fir.cuda_kernel_launch @kernel<<<%gx, %gy, %bx, %by, %bz>>>()
- ```
- }];
-
- let arguments = (ins
- SymbolRefAttr:$callee,
- I32:$grid_x,
- I32:$grid_y,
- I32:$grid_z,
- I32:$block_x,
- I32:$block_y,
- I32:$block_z,
- Optional<I32>:$bytes,
- Optional<I32>:$stream,
- Variadic<AnyType>:$args
- );
-
- let assemblyFormat = [{
- $callee `<` `<` `<` $grid_x `,` $grid_y `,` $grid_z `,`$block_x `,`
- $block_y `,` $block_z ( `,` $bytes^ ( `,` $stream^ )? )? `>` `>` `>`
- `` `(` $args `)` ( `:` `(` type($args)^ `)` )? attr-dict
- }];
-
- let extraClassDeclaration = [{
- mlir::CallInterfaceCallable getCallableForCallee() {
- return getCalleeAttr();
- }
-
- void setCalleeFromCallable(mlir::CallInterfaceCallable callee) {
- (*this)->setAttr(getCalleeAttrName(), callee.get<mlir::SymbolRefAttr>());
- }
- mlir::FunctionType getFunctionType();
-
- unsigned getNbNoArgOperand() {
- unsigned nbNoArgOperand = 5; // grids and blocks values are always present.
- if (getBytes()) ++nbNoArgOperand;
- if (getStream()) ++nbNoArgOperand;
- return nbNoArgOperand;
- }
-
- operand_range getArgOperands() {
- return {operand_begin() + getNbNoArgOperand(), operand_end()};
- }
- mlir::MutableOperandRange getArgOperandsMutable() {
- return mlir::MutableOperandRange(
- *this, getNbNoArgOperand(), getArgs().size() - 1);
- }
- }];
-}
-
// Constant operations that support Fortran
def fir_StringLitOp : fir_Op<"string_lit", [NoMemoryEffect]> {
@@ -2797,7 +2738,7 @@ def fir_GlobalOp : fir_Op<"global", [IsolatedFromAbove, Symbol]> {
OptionalAttr<UnitAttr>:$constant,
OptionalAttr<UnitAttr>:$target,
OptionalAttr<StrAttr>:$linkName,
- OptionalAttr<fir_CUDADataAttributeAttr>:$cuda_attr
+ OptionalAttr<cuf_DataAttributeAttr>:$data_attr
);
let regions = (region AtMostRegion<1>:$region);
@@ -3077,7 +3018,7 @@ def fir_DeclareOp : fir_Op<"declare", [AttrSizedOperandSegments,
Optional<fir_DummyScopeType>:$dummy_scope,
Builtin_StringAttr:$uniq_name,
OptionalAttr<fir_FortranVariableFlagsAttr>:$fortran_attrs,
- OptionalAttr<fir_CUDADataAttributeAttr>:$cuda_attr
+ OptionalAttr<cuf_DataAttributeAttr>:$data_attr
);
let results = (outs AnyRefOrBox);
@@ -3130,125 +3071,6 @@ def fir_BoxOffsetOp : fir_Op<"box_offset", [NoMemoryEffect]> {
];
}
-def fir_CUDAKernelOp : fir_Op<"cuda_kernel", [AttrSizedOperandSegments,
- DeclareOpInterfaceMethods<LoopLikeOpInterface>]> {
-
- let description = [{
- Represent the CUDA Fortran kernel directive. The operation is a loop like
- operation that represents the iteration range of the embedded loop nest.
-
- When grid or block variadic operands are empty, a `*` only syntax was used
- in the Fortran code.
- If the `*` is mixed with values for either grid or block, these are
- represented by a 0 constant value.
- }];
-
- let arguments = (ins
- Variadic<I32>:$grid, // empty means `*`
- Variadic<I32>:$block, // empty means `*`
- Optional<I32>:$stream,
- Variadic<Index>:$lowerbound,
- Variadic<Index>:$upperbound,
- Variadic<Index>:$step,
- OptionalAttr<I64Attr>:$n
- );
-
- let regions = (region AnyRegion:$region);
-
- let assemblyFormat = [{
- `<` `<` `<` custom<CUFKernelValues>($grid, type($grid)) `,`
- custom<CUFKernelValues>($block, type($block))
- ( `,` `stream` `=` $stream^ )? `>` `>` `>`
- custom<CUFKernelLoopControl>($region, $lowerbound, type($lowerbound),
- $upperbound, type($upperbound), $step, type($step))
- attr-dict
- }];
-
- let hasVerifier = 1;
-}
-
-def fir_CUDADataTransferOp : fir_Op<"cuda_data_transfer", []> {
- let summary = "Represent a data transfer between host and device memory";
-
- let description = [{
- CUDA Fortran allows data transfer to be done via intrinsic assignment
- between a host and a device variable. This operation is used to materialized
- the data transfer between the lhs and rhs memory references.
- The kind of transfer is specified in the attribute.
-
- ```
- adev = a ! transfer host to device
- a = adev ! transfer device to host
- bdev = adev ! transfer device to device
- ```
- }];
-
- let arguments = (ins Arg<AnyReferenceLike, "", [MemWrite]>:$src,
- Arg<AnyReferenceLike, "", [MemRead]>:$dst,
- fir_CUDADataTransferKindAttr:$transfer_kind);
-
- let assemblyFormat = [{
- $src `to` $dst attr-dict `:` type(operands)
- }];
-}
-
-def fir_CUDAAllocateOp : fir_Op<"cuda_allocate", [AttrSizedOperandSegments,
- MemoryEffects<[MemAlloc<DefaultResource>]>]> {
- let summary = "Perform the device allocation of data of an allocatable";
-
- let description = [{
- The fir.cuda_allocate operation performs the allocation on the device
- of the data of an allocatable. The descriptor passed to the operation
- is initialized before with the standard flang runtime calls.
- }];
-
- let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
- Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
- Optional<AnyIntegerType>:$stream,
- Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$pinned,
- Arg<Optional<AnyRefOrBoxType>, "", [MemRead]>:$source,
- fir_CUDADataAttributeAttr:$cuda_attr,
- UnitAttr:$hasStat);
-
- let results = (outs AnyIntegerType:$stat);
-
- let assemblyFormat = [{
- $box `:` qualified(type($box))
- ( `source` `(` $source^ `:` qualified(type($source) )`)` )?
- ( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )?
- ( `stream` `(` $stream^ `:` type($stream) `)` )?
- ( `pinned` `(` $pinned^ `:` type($pinned) `)` )?
- attr-dict `->` type($stat)
- }];
-
- let hasVerifier = 1;
-}
-
-def fir_CUDADeallocateOp : fir_Op<"cuda_deallocate",
- [MemoryEffects<[MemFree<DefaultResource>]>]> {
- let summary = "Perform the device deallocation of data of an allocatable";
-
- let description = [{
- The fir.cuda_deallocate operation performs the deallocation on the device
- of the data of an allocatable.
- }];
-
- let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
- Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
- fir_CUDADataAttributeAttr:$cuda_attr,
- UnitAttr:$hasStat);
-
- let results = (outs AnyIntegerType:$stat);
-
- let assemblyFormat = [{
- $box `:` qualified(type($box))
- ( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )?
- attr-dict `->` type($stat)
- }];
-
- let hasVerifier = 1;
-}
-
def fir_DummyScopeOp : fir_Op<"dummy_scope",
[MemoryEffects<[MemWrite<DebuggingResource>]>]> {
let summary = "Define a scope for dummy arguments";
@@ -3329,62 +3151,4 @@ def fir_DummyScopeOp : fir_Op<"dummy_scope",
let assemblyFormat = "attr-dict `:` type(results)";
}
-def fir_CUDAAllocOp : fir_Op<"cuda_alloc", [AttrSizedOperandSegments,
- MemoryEffects<[MemAlloc]>]> {
- let summary = "Allocate an object on device";
-
- let description = [{
- This is a drop in replacement for fir.alloca and fir.allocmem for device
- object. Any device, managed or unified object declared in an host
- subprogram needs to be allocated in the device memory through runtime calls.
- The fir.cuda_alloc is an abstraction to the runtime calls and works together
- with fir.cuda_free.
- }];
-
- let arguments = (ins
- TypeAttr:$in_type,
- OptionalAttr<StrAttr>:$uniq_name,
- OptionalAttr<StrAttr>:$bindc_name,
- Variadic<AnyIntegerType>:$typeparams,
- Variadic<AnyIntegerType>:$shape,
- fir_CUDADataAttributeAttr:$cuda_attr
- );
-
- let results = (outs fir_ReferenceType:$ptr);
-
- let assemblyFormat = [{
- $in_type (`(` $typeparams^ `:` type($typeparams) `)`)?
- (`,` $shape^ `:` type($shape) )? attr-dict `->` qualified(type($ptr))
- }];
-
- let builders = [
- OpBuilder<(ins "mlir::Type":$inType, "llvm::StringRef":$uniqName,
- "llvm::StringRef":$bindcName,
- "fir::CUDADataAttributeAttr":$cudaAttr,
- CArg<"mlir::ValueRange", "{}">:$typeparams,
- CArg<"mlir::ValueRange", "{}">:$shape,
- CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes)>];
-
- let hasVerifier = 1;
-}
-
-def fir_CUDAFreeOp : fir_Op<"cuda_free", [MemoryEffects<[MemFree]>]> {
- let summary = "Free a device allocated object";
-
- let description = [{
- The fir.cuda_free operation frees the memory allocated by fir.cuda_alloc.
- This is used for non-allocatable device, managed and unified device
- variables declare in host subprogram.
- }];
-
- let arguments = (ins
- Arg<AnyReferenceLike, "", [MemFree]>:$devptr,
- fir_CUDADataAttributeAttr:$cuda_attr
- );
-
- let assemblyFormat = "$devptr `:` qualified(type($devptr)) attr-dict";
-
- let hasVerifier = 1;
-}
-
#endif
diff --git a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
index 46b62d8de8d37..b68a39bf374bd 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
+++ b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
@@ -75,19 +75,6 @@ constexpr llvm::StringRef getOptionalAttrName() { return "fir.optional"; }
/// Attribute to mark Fortran entities with the TARGET attribute.
static constexpr llvm::StringRef getTargetAttrName() { return "fir.target"; }
-/// Attribute to mark Fortran entities with the CUDA attribute.
-static constexpr llvm::StringRef getCUDAAttrName() { return "fir.cuda_attr"; }
-
-/// Attribute to carry CUDA launch_bounds values.
-static constexpr llvm::StringRef getCUDALaunchBoundsAttrName() {
- return "fir.cuda_launch_bounds";
-}
-
-/// Attribute to carry CUDA cluster_dims values.
-static constexpr llvm::StringRef getCUDAClusterDimsAttrName() {
- return "fir.cuda_cluster_dims";
-}
-
/// Attribute to mark that a function argument is a character dummy procedure.
/// Character dummy procedure have special ABI constraints.
static constexpr llvm::StringRef getCharacterProcedureDummyAttrName() {
diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
index 376417e3c3531..b537d9e11ef80 100644
--- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
+++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
@@ -15,6 +15,7 @@
#define FORTRAN_DIALECT_HLFIR_OPS
include "flang/Optimizer/HLFIR/HLFIROpBase.td"
+include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td"
include "flang/Optimizer/Dialect/FIRTypes.td"
include "flang/Optimizer/Dialect/FIRAttr.td"
include "flang/Optimizer/Dialect/FortranVariableInterface.td"
@@ -90,7 +91,7 @@ def hlfir_DeclareOp : hlfir_Op<"declare", [AttrSizedOperandSegments,
Optional<fir_DummyScopeType>:$dummy_scope,
Builtin_StringAttr:$uniq_name,
OptionalAttr<fir_FortranVariableFlagsAttr>:$fortran_attrs,
- OptionalAttr<fir_CUDADataAttributeAttr>:$cuda_attr
+ OptionalAttr<cuf_DataAttributeAttr>:$data_attr
);
let results = (outs AnyFortranVariable, AnyRefOrBoxLike);
@@ -106,7 +107,7 @@ def hlfir_DeclareOp : hlfir_Op<"declare", [AttrSizedOperandSegments,
CArg<"mlir::Value", "{}">:$shape, CArg<"mlir::ValueRange", "{}">:$typeparams,
CArg<"mlir::Value", "{}">:$dummy_scope,
CArg<"fir::FortranVariableFlagsAttr", "{}">:$fortran_attrs,
- CArg<"fir::CUDADataAttributeAttr", "{}">:$cuda_attr)>];
+ CArg<"cuf::DataAttributeAttr", "{}">:$data_attr)>];
let extraClassDeclaration = [{
/// Get the variable original base (same as input). It lacks
diff --git a/flang/include/flang/Optimizer/Support/InitFIR.h b/flang/include/flang/Optimizer/Support/InitFIR.h
index 9f4c4ed28a4ae..48cc1cbc64568 100644
--- a/flang/include/flang/Optimizer/Support/InitFIR.h
+++ b/flang/include/flang/Optimizer/Support/InitFIR.h
@@ -13,6 +13,7 @@
#ifndef FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H
#define FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H
+#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
#include "mlir/Conversion/Passes.h"
@@ -34,7 +35,7 @@ namespace fir::support {
mlir::scf::SCFDialect, mlir::arith::ArithDialect, \
mlir::cf::ControlFlowDialect, mlir::func::FuncDialect, \
mlir::vector::VectorDialect, mlir::math::MathDialect, \
- mlir::complex::ComplexDialect, mlir::DLTIDialect
+ mlir::complex::ComplexDialect, mlir::DLTIDialect, cuf::CUFDialect
#define FLANG_CODEGEN_DIALECT_LIST FIRCodeGenDialect, mlir::LLVM::LLVMDialect
diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h
index 2da6f24da40e9..d8bcb5fae034d 100644
--- a/flang/include/flang/Optimizer/Support/Utils.h
+++ b/flang/include/flang/Optimizer/Support/Utils.h
@@ -16,6 +16,7 @@
#include "flang/Common/default-kinds.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/Support/FatalError.h"
@@ -136,66 +137,6 @@ inline void intrinsicTypeTODO(fir::FirOpBuilder &builder, mlir::Type type,
" in " + intrinsicName);
}
-inline fir::CUDADataAttributeAttr
-getCUDADataAttribute(mlir::MLIRContext *mlirContext,
- std::optional<Fortran::common::CUDADataAttr> cudaAttr) {
- if (cudaAttr) {
- fir::CUDADataAttribute attr;
- switch (*cudaAttr) {
- case Fortran::common::CUDADataAttr::Constant:
- attr = fir::CUDADataAttribute::Constant;
- break;
- case Fortran::common::CUDADataAttr::Device:
- attr = fir::CUDADataAttribute::Device;
- break;
- case Fortran::common::CUDADataAttr::Managed:
- attr = fir::CUDADataAttribute::Managed;
- break;
- case Fortran::common::CUDADataAttr::Pinned:
- attr = fir::CUDADataAttribute::Pinned;
- break;
- case Fortran::common::CUDADataAttr::Shared:
- attr = fir::CUDADataAttribute::Shared;
- break;
- case Fortran::common::CUDADataAttr::Texture:
- // Obsolete attribute
- return {};
- case Fortran::common::CUDADataAttr::Unified:
- attr = fir::CUDADataAttribute::Unified;
- break;
- }
- return fir::CUDADataAttributeAttr::get(mlirContext, attr);
- }
- return {};
-}
-
-inline fir::CUDAProcAttributeAttr getCUDAProcAttribute(
- mlir::MLIRContext *mlirContext,
- std::optional<Fortran::common::CUDASubprogramAttrs> cudaAttr) {
- if (cudaAttr) {
- fir::CUDAProcAttribute attr;
- switch (*cudaAttr) {
- case Fortran::common::CUDASubprogramAttrs::Host:
- attr = fir::CUDAProcAttribute::Host;
- break;
- case Fortran::common::CUDASubprogramAttrs::Device:
- attr = fir::CUDAProcAttribute::Device;
- break;
- case Fortran::common::CUDASubprogramAttrs::HostDevice:
- attr = fir::CUDAProcAttribute::HostDevice;
- break;
- case Fortran::common::CUDASubprogramAttrs::Global:
- attr = fir::CUDAProcAttribute::Global;
- break;
- case Fortran::common::CUDASubprogramAttrs::Grid_Global:
- attr = fir::CUDAProcAttribute::GridGlobal;
- break;
- }
- return fir::CUDAProcAttributeAttr::get(mlirContext, attr);
- }
- return {};
-}
-
} // namespace fir
#endif // FORTRAN_OPTIMIZER_SUPPORT_UTILS_H
diff --git a/flang/lib/Frontend/CMakeLists.txt b/flang/lib/Frontend/CMakeLists.txt
index a701c264bc4c2..f85665d114299 100644
--- a/flang/lib/Frontend/CMakeLists.txt
+++ b/flang/lib/Frontend/CMakeLists.txt
@@ -14,6 +14,7 @@ add_flang_library(flangFrontend
TextDiagnostic.cpp
DEPENDS
+ CUFDialect
FIRDialect
FIROptCodeGenPassIncGen
FIROptTransformsPassIncGen
@@ -23,6 +24,7 @@ add_flang_library(flangFrontend
${extension_libs}
LINK_LIBS
+ CUFDialect
FortranParser
FortranSemantics
FortranEvaluate
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index a1957c0eb1bb7..61f4bbd856a8a 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -24,6 +24,7 @@
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
#include "flang/Optimizer/Support/FatalError.h"
@@ -729,9 +730,9 @@ class AllocateStmtHelper {
ErrorManager &errorManager,
const Fortran::semantics::Symbol &sym) {
Fortran::lower::StatementContext stmtCtx;
- fir::CUDADataAttributeAttr cudaAttr =
- Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
- sym);
+ cuf::DataAttributeAttr cudaAttr =
+ Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
+ sym);
mlir::Value errmsg = errMsgExpr ? errorManager.errMsgAddr : nullptr;
mlir::Value stream =
streamExpr
@@ -746,7 +747,7 @@ class AllocateStmtHelper {
// Keep return type the same as a standard AllocatableAllocate call.
mlir::Type retTy = fir::runtime::getModel<int>()(builder.getContext());
return builder
- .create<fir::CUDAAllocateOp>(
+ .create<cuf::AllocateOp>(
loc, retTy, box.getAddr(), errmsg, stream, pinned, source, cudaAttr,
errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr)
.getResult();
@@ -804,9 +805,9 @@ static mlir::Value genCudaDeallocate(fir::FirOpBuilder &builder,
const fir::MutableBoxValue &box,
ErrorManager &errorManager,
const Fortran::semantics::Symbol &sym) {
- fir::CUDADataAttributeAttr cudaAttr =
- Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
- sym);
+ cuf::DataAttributeAttr cudaAttr =
+ Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
+ sym);
mlir::Value errmsg =
mlir::isa<fir::AbsentOp>(errorManager.errMsgAddr.getDefiningOp())
? nullptr
@@ -815,7 +816,7 @@ static mlir::Value genCudaDeallocate(fir::FirOpBuilder &builder,
// Keep return type the same as a standard AllocatableAllocate call.
mlir::Type retTy = fir::runtime::getModel<int>()(builder.getContext());
return builder
- .create<fir::CUDADeallocateOp>(
+ .create<cuf::DeallocateOp>(
loc, retTy, box.getAddr(), errmsg, cudaAttr,
errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr)
.getResult();
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 596049fcfc924..b5179fdee62a1 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -40,6 +40,8 @@
#include "flang/Optimizer/Builder/Runtime/Ragged.h"
#include "flang/Optimizer/Builder/Runtime/Stop.h"
#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
+#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Optimizer/Dialect/FIRAttr.h"
#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/Dialect/FIROps.h"
@@ -2642,8 +2644,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
loopEval = &*std::next(loopEval->getNestedEvaluations().begin());
}
- auto op = builder->create<fir::CUDAKernelOp>(
- loc, gridValues, blockValues, streamValue, lbs, ubs, steps, n);
+ auto op = builder->create<cuf::KernelOp>(loc, gridValues, blockValues,
+ streamValue, lbs, ubs, steps, n);
builder->createBlock(&op.getRegion(), op.getRegion().end(), ivTypes,
ivLocs);
mlir::Block &b = op.getRegion().back();
@@ -3713,46 +3715,44 @@ class FirConverter : public Fortran::lower::AbstractConverter {
// device = host
if (lhsIsDevice && !rhsIsDevice) {
- auto transferKindAttr = fir::CUDADataTransferKindAttr::get(
- builder.getContext(), fir::CUDADataTransferKind::HostDevice);
+ auto transferKindAttr = cuf::DataTransferKindAttr::get(
+ builder.getContext(), cuf::DataTransferKind::HostDevice);
if (!rhs.isVariable()) {
auto associate = hlfir::genAssociateExpr(
loc, builder, rhs, rhs.getType(), ".cuf_host_tmp");
- builder.create<fir::CUDADataTransferOp>(loc, associate.getBase(), lhs,
- transferKindAttr);
+ builder.create<cuf::DataTransferOp>(loc, associate.getBase(), lhs,
+ transferKindAttr);
builder.create<hlfir::EndAssociateOp>(loc, associate);
} else {
- builder.create<fir::CUDADataTransferOp>(loc, rhs, lhs,
- transferKindAttr);
+ builder.create<cuf::DataTransferOp>(loc, rhs, lhs, transferKindAttr);
}
return;
}
// host = device
if (!lhsIsDevice && rhsIsDevice) {
- auto transferKindAttr = fir::CUDADataTransferKindAttr::get(
- builder.getContext(), fir::CUDADataTransferKind::DeviceHost);
+ auto transferKindAttr = cuf::DataTransferKindAttr::get(
+ builder.getContext(), cuf::DataTransferKind::DeviceHost);
if (!rhs.isVariable()) {
// evaluateRhs loads scalar. Look for the memory reference to be used in
// the transfer.
if (mlir::isa_and_nonnull<fir::LoadOp>(rhs.getDefiningOp())) {
auto loadOp = mlir::dyn_cast<fir::LoadOp>(rhs.getDefiningOp());
- builder.create<fir::CUDADataTransferOp>(loc, loadOp.getMemref(), lhs,
- transferKindAttr);
+ builder.create<cuf::DataTransferOp>(loc, loadOp.getMemref(), lhs,
+ transferKindAttr);
return;
}
} else {
- builder.create<fir::CUDADataTransferOp>(loc, rhs, lhs,
- transferKindAttr);
+ builder.create<cuf::DataTransferOp>(loc, rhs, lhs, transferKindAttr);
}
return;
}
if (lhsIsDevice && rhsIsDevice) {
assert(rhs.isVariable() && "CUDA Fortran assignment rhs is not legal");
- auto transferKindAttr = fir::CUDADataTransferKindAttr::get(
- builder.getContext(), fir::CUDADataTransferKind::DeviceDevice);
- builder.create<fir::CUDADataTransferOp>(loc, rhs, lhs, transferKindAttr);
+ auto transferKindAttr = cuf::DataTransferKindAttr::get(
+ builder.getContext(), cuf::DataTransferKind::DeviceDevice);
+ builder.create<cuf::DataTransferOp>(loc, rhs, lhs, transferKindAttr);
return;
}
llvm_unreachable("Unhandled CUDA data transfer");
@@ -3763,8 +3763,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
const Fortran::evaluate::Assignment &assign) {
llvm::SmallVector<mlir::Value> temps;
localSymbols.pushScope();
- auto transferKindAttr = fir::CUDADataTransferKindAttr::get(
- builder.getContext(), fir::CUDADataTransferKind::DeviceHost);
+ auto transferKindAttr = cuf::DataTransferKindAttr::get(
+ builder.getContext(), cuf::DataTransferKind::DeviceHost);
[[maybe_unused]] unsigned nbDeviceResidentObject = 0;
for (const Fortran::semantics::Symbol &sym :
Fortran::evaluate::CollectSymbols(assign.rhs)) {
@@ -3789,8 +3789,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
addSymbol(sym,
hlfir::translateToExtendedValue(loc, builder, temp).first,
/*forced=*/true);
- builder.create<fir::CUDADataTransferOp>(loc, addr, temp,
- transferKindAttr);
+ builder.create<cuf::DataTransferOp>(loc, addr, temp,
+ transferKindAttr);
++nbDeviceResidentObject;
}
}
@@ -3802,15 +3802,15 @@ class FirConverter : public Fortran::lower::AbstractConverter {
// subprogram are not considered fully device context so it will return false
// for it.
static bool isDeviceContext(fir::FirOpBuilder &builder) {
- if (builder.getRegion().getParentOfType<fir::CUDAKernelOp>())
+ if (builder.getRegion().getParentOfType<cuf::KernelOp>())
return true;
if (auto funcOp =
builder.getRegion().getParentOfType<mlir::func::FuncOp>()) {
if (auto cudaProcAttr =
- funcOp.getOperation()->getAttrOfType<fir::CUDAProcAttributeAttr>(
- fir::getCUDAAttrName())) {
- return cudaProcAttr.getValue() != fir::CUDAProcAttribute::Host &&
- cudaProcAttr.getValue() != fir::CUDAProcAttribute::HostDevice;
+ funcOp.getOperation()->getAttrOfType<cuf::ProcAttributeAttr>(
+ cuf::getProcAttrName())) {
+ return cudaProcAttr.getValue() != cuf::ProcAttribute::Host &&
+ cudaProcAttr.getValue() != cuf::ProcAttribute::HostDevice;
}
}
return false;
diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt
index 1546409752e78..ba6622d8504a4 100644
--- a/flang/lib/Lower/CMakeLists.txt
+++ b/flang/lib/Lower/CMakeLists.txt
@@ -37,6 +37,8 @@ add_flang_library(FortranLower
VectorSubscripts.cpp
DEPENDS
+ CUFAttrs
+ CUFDialect
FIRDialect
FIRTransforms
HLFIRDialect
@@ -44,6 +46,8 @@ add_flang_library(FortranLower
${extension_libs}
LINK_LIBS
+ CUFAttrs
+ CUFDialect
FIRDialect
FIRDialectSupport
FIRBuilder
diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp
index c1f54ad392879..cfbb7c7f6b4f4 100644
--- a/flang/lib/Lower/CallInterface.cpp
+++ b/flang/lib/Lower/CallInterface.cpp
@@ -627,9 +627,9 @@ setCUDAAttributes(mlir::func::FuncOp func,
characteristic) {
if (characteristic && characteristic->cudaSubprogramAttrs) {
func.getOperation()->setAttr(
- fir::getCUDAAttrName(),
- fir::getCUDAProcAttribute(func.getContext(),
- *characteristic->cudaSubprogramAttrs));
+ cuf::getProcAttrName(),
+ cuf::getProcAttribute(func.getContext(),
+ *characteristic->cudaSubprogramAttrs));
}
if (sym) {
@@ -649,9 +649,9 @@ setCUDAAttributes(mlir::func::FuncOp func,
ubAttr =
mlir::IntegerAttr::get(i64Ty, details->cudaLaunchBounds()[2]);
func.getOperation()->setAttr(
- fir::getCUDALaunchBoundsAttrName(),
- fir::CUDALaunchBoundsAttr::get(func.getContext(), maxTPBAttr,
- minBPMAttr, ubAttr));
+ cuf::getLaunchBoundsAttrName(),
+ cuf::LaunchBoundsAttr::get(func.getContext(), maxTPBAttr,
+ minBPMAttr, ubAttr));
}
if (!details->cudaClusterDims().empty()) {
@@ -663,9 +663,8 @@ setCUDAAttributes(mlir::func::FuncOp func,
auto zAttr =
mlir::IntegerAttr::get(i64Ty, details->cudaClusterDims()[2]);
func.getOperation()->setAttr(
- fir::getCUDAClusterDimsAttrName(),
- fir::CUDAClusterDimsAttr::get(func.getContext(), xAttr, yAttr,
- zAttr));
+ cuf::getClusterDimsAttrName(),
+ cuf::ClusterDimsAttr::get(func.getContext(), xAttr, yAttr, zAttr));
}
}
}
@@ -1116,8 +1115,8 @@ class Fortran::lower::CallInterfaceImpl {
addMLIRAttr(fir::getTargetAttrName());
if (obj.cudaDataAttr)
attrs.emplace_back(
- mlir::StringAttr::get(&mlirContext, fir::getCUDAAttrName()),
- fir::getCUDADataAttribute(&mlirContext, obj.cudaDataAttr));
+ mlir::StringAttr::get(&mlirContext, cuf::getDataAttrName()),
+ cuf::getDataAttribute(&mlirContext, obj.cudaDataAttr));
// TODO: intents that require special care (e.g finalization)
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index c6bfe35921699..7ec719a2cb9ec 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -28,6 +28,7 @@
#include "flang/Optimizer/Builder/MutableBox.h"
#include "flang/Optimizer/Builder/Runtime/Derived.h"
#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "mlir/IR/IRMapping.h"
@@ -589,7 +590,7 @@ std::pair<fir::ExtendedValue, bool> Fortran::lower::genCallOpAndResult(
fir::getBase(converter.genExprValue(
caller.getCallDescription().chevrons()[3], stmtCtx)));
- builder.create<fir::CUDAKernelLaunch>(
+ builder.create<cuf::KernelLaunchOp>(
loc, funcType.getResults(), funcSymbolAttr, grid_x, grid_y, grid_z,
block_x, block_y, block_z, bytes, stream, operands);
callNumResults = 0;
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index 5ddd8a6a9d41f..b8868161fa050 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -30,6 +30,7 @@
#include "flang/Optimizer/Builder/IntrinsicCall.h"
#include "flang/Optimizer/Builder/Runtime/Derived.h"
#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Optimizer/Dialect/FIRAttr.h"
#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/Dialect/FIROps.h"
@@ -139,7 +140,7 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter,
const Fortran::lower::pft::Variable &var,
llvm::StringRef globalName,
mlir::StringAttr linkage,
- fir::CUDADataAttributeAttr cudaAttr = {});
+ cuf::DataAttributeAttr dataAttr = {});
static mlir::Location genLocation(Fortran::lower::AbstractConverter &converter,
const Fortran::semantics::Symbol &sym) {
@@ -172,12 +173,12 @@ static fir::GlobalOp declareGlobal(Fortran::lower::AbstractConverter &converter,
!Fortran::semantics::IsProcedurePointer(ultimate))
mlir::emitError(loc, "processing global declaration: symbol '")
<< toStringRef(sym.name()) << "' has unexpected details\n";
- fir::CUDADataAttributeAttr cudaAttr =
- Fortran::lower::translateSymbolCUDADataAttribute(
+ cuf::DataAttributeAttr dataAttr =
+ Fortran::lower::translateSymbolCUFDataAttribute(
converter.getFirOpBuilder().getContext(), sym);
return builder.createGlobal(loc, converter.genType(var), globalName, linkage,
mlir::Attribute{}, isConstant(ultimate),
- var.isTarget(), cudaAttr);
+ var.isTarget(), dataAttr);
}
/// Temporary helper to catch todos in initial data target lowering.
@@ -474,7 +475,7 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter,
const Fortran::lower::pft::Variable &var,
llvm::StringRef globalName,
mlir::StringAttr linkage,
- fir::CUDADataAttributeAttr cudaAttr) {
+ cuf::DataAttributeAttr dataAttr) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
const Fortran::semantics::Symbol &sym = var.getSymbol();
mlir::Location loc = genLocation(converter, sym);
@@ -514,7 +515,7 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter,
if (!global)
global =
builder.createGlobal(loc, symTy, globalName, linkage, mlir::Attribute{},
- isConst, var.isTarget(), cudaAttr);
+ isConst, var.isTarget(), dataAttr);
if (Fortran::semantics::IsAllocatableOrPointer(sym) &&
!Fortran::semantics::IsProcedure(sym)) {
const auto *details =
@@ -694,9 +695,9 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter,
return builder.create<fir::ZeroOp>(loc, fir::ReferenceType::get(ty));
if (Fortran::semantics::NeedCUDAAlloc(ultimateSymbol)) {
- fir::CUDADataAttributeAttr cudaAttr =
- Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
- ultimateSymbol);
+ cuf::DataAttributeAttr dataAttr =
+ Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
+ ultimateSymbol);
llvm::SmallVector<mlir::Value> indices;
llvm::SmallVector<mlir::Value> elidedShape =
fir::factory::elideExtentsAlreadyInType(ty, shape);
@@ -705,8 +706,8 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter,
auto idxTy = builder.getIndexType();
for (mlir::Value sh : elidedShape)
indices.push_back(builder.createConvert(loc, idxTy, sh));
- return builder.create<fir::CUDAAllocOp>(loc, ty, nm, symNm, cudaAttr,
- lenParams, indices);
+ return builder.create<cuf::AllocOp>(loc, ty, nm, symNm, dataAttr, lenParams,
+ indices);
}
// Let the builder do all the heavy lifting.
@@ -950,10 +951,10 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter,
converter.getSymbolExtendedValue(var.getSymbol(), &symMap);
auto *sym = &var.getSymbol();
converter.getFctCtx().attachCleanup([builder, loc, exv, sym]() {
- fir::CUDADataAttributeAttr cudaAttr =
- Fortran::lower::translateSymbolCUDADataAttribute(
- builder->getContext(), *sym);
- builder->create<fir::CUDAFreeOp>(loc, fir::getBase(exv), cudaAttr);
+ cuf::DataAttributeAttr dataAttr =
+ Fortran::lower::translateSymbolCUFDataAttribute(builder->getContext(),
+ *sym);
+ builder->create<cuf::FreeOp>(loc, fir::getBase(exv), dataAttr);
});
}
}
@@ -1628,11 +1629,11 @@ fir::FortranVariableFlagsAttr Fortran::lower::translateSymbolAttributes(
return fir::FortranVariableFlagsAttr::get(mlirContext, flags);
}
-fir::CUDADataAttributeAttr Fortran::lower::translateSymbolCUDADataAttribute(
+cuf::DataAttributeAttr Fortran::lower::translateSymbolCUFDataAttribute(
mlir::MLIRContext *mlirContext, const Fortran::semantics::Symbol &sym) {
std::optional<Fortran::common::CUDADataAttr> cudaAttr =
Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate());
- return fir::getCUDADataAttribute(mlirContext, cudaAttr);
+ return cuf::getDataAttribute(mlirContext, cudaAttr);
}
/// Map a symbol to its FIR address and evaluated specification expressions.
@@ -1672,9 +1673,9 @@ static void genDeclareSymbol(Fortran::lower::AbstractConverter &converter,
auto name = converter.mangleName(sym);
fir::FortranVariableFlagsAttr attributes =
Fortran::lower::translateSymbolAttributes(builder.getContext(), sym);
- fir::CUDADataAttributeAttr cudaAttr =
- Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
- sym);
+ cuf::DataAttributeAttr dataAttr =
+ Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
+ sym);
if (sym.test(Fortran::semantics::Symbol::Flag::CrayPointee)) {
mlir::Type ptrBoxType =
@@ -1716,7 +1717,7 @@ static void genDeclareSymbol(Fortran::lower::AbstractConverter &converter,
dummyScope = converter.dummyArgsScopeValue();
auto newBase = builder.create<hlfir::DeclareOp>(
loc, base, name, shapeOrShift, lenParams, dummyScope, attributes,
- cudaAttr);
+ dataAttr);
symMap.addVariableDefinition(sym, newBase, force);
return;
}
@@ -1762,15 +1763,15 @@ void Fortran::lower::genDeclareSymbol(
fir::FortranVariableFlagsAttr attributes =
Fortran::lower::translateSymbolAttributes(
builder.getContext(), sym.GetUltimate(), extraFlags);
- fir::CUDADataAttributeAttr cudaAttr =
- Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(),
- sym.GetUltimate());
+ cuf::DataAttributeAttr dataAttr =
+ Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
+ sym.GetUltimate());
auto name = converter.mangleName(sym);
mlir::Value dummyScope;
if (converter.isRegisteredDummySymbol(sym))
dummyScope = converter.dummyArgsScopeValue();
hlfir::EntityWithAttributes declare = hlfir::genDeclare(
- loc, builder, exv, name, attributes, dummyScope, cudaAttr);
+ loc, builder, exv, name, attributes, dummyScope, dataAttr);
symMap.addVariableDefinition(sym, declare.getIfVariableInterface(), force);
return;
}
@@ -2272,10 +2273,10 @@ void Fortran::lower::defineModuleVariable(
// Do nothing. Mapping will be done on user side.
} else {
std::string globalName = converter.mangleName(sym);
- fir::CUDADataAttributeAttr cudaAttr =
- Fortran::lower::translateSymbolCUDADataAttribute(
+ cuf::DataAttributeAttr dataAttr =
+ Fortran::lower::translateSymbolCUFDataAttribute(
converter.getFirOpBuilder().getContext(), sym);
- defineGlobal(converter, var, globalName, linkage, cudaAttr);
+ defineGlobal(converter, var, globalName, linkage, dataAttr);
}
}
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index bd018d7f015b8..3c3fd02d7c88e 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -322,18 +322,18 @@ mlir::Value fir::FirOpBuilder::createHeapTemporary(
fir::GlobalOp fir::FirOpBuilder::createGlobal(
mlir::Location loc, mlir::Type type, llvm::StringRef name,
mlir::StringAttr linkage, mlir::Attribute value, bool isConst,
- bool isTarget, fir::CUDADataAttributeAttr cudaAttr) {
+ bool isTarget, cuf::DataAttributeAttr dataAttr) {
if (auto global = getNamedGlobal(name))
return global;
auto module = getModule();
auto insertPt = saveInsertionPoint();
setInsertionPoint(module.getBody(), module.getBody()->end());
llvm::SmallVector<mlir::NamedAttribute> attrs;
- if (cudaAttr) {
+ if (dataAttr) {
auto globalOpName = mlir::OperationName(fir::GlobalOp::getOperationName(),
module.getContext());
attrs.push_back(mlir::NamedAttribute(
- fir::GlobalOp::getCudaAttrAttrName(globalOpName), cudaAttr));
+ fir::GlobalOp::getDataAttrAttrName(globalOpName), dataAttr));
}
auto glob = create<fir::GlobalOp>(loc, name, isConst, isTarget, type, value,
linkage, attrs);
@@ -346,7 +346,7 @@ fir::GlobalOp fir::FirOpBuilder::createGlobal(
fir::GlobalOp fir::FirOpBuilder::createGlobal(
mlir::Location loc, mlir::Type type, llvm::StringRef name, bool isConst,
bool isTarget, std::function<void(FirOpBuilder &)> bodyBuilder,
- mlir::StringAttr linkage, fir::CUDADataAttributeAttr cudaAttr) {
+ mlir::StringAttr linkage, cuf::DataAttributeAttr dataAttr) {
if (auto global = getNamedGlobal(name))
return global;
auto module = getModule();
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 8fdab2a571812..511585dc76894 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -199,7 +199,7 @@ fir::FortranVariableOpInterface
hlfir::genDeclare(mlir::Location loc, fir::FirOpBuilder &builder,
const fir::ExtendedValue &exv, llvm::StringRef name,
fir::FortranVariableFlagsAttr flags, mlir::Value dummyScope,
- fir::CUDADataAttributeAttr cudaAttr) {
+ cuf::DataAttributeAttr dataAttr) {
mlir::Value base = fir::getBase(exv);
assert(fir::conformsWithPassByRef(base.getType()) &&
@@ -229,7 +229,7 @@ hlfir::genDeclare(mlir::Location loc, fir::FirOpBuilder &builder,
},
[](const auto &) {});
auto declareOp = builder.create<hlfir::DeclareOp>(
- loc, base, name, shapeOrShift, lenParams, dummyScope, flags, cudaAttr);
+ loc, base, name, shapeOrShift, lenParams, dummyScope, flags, dataAttr);
return mlir::cast<fir::FortranVariableOpInterface>(declareOp.getOperation());
}
diff --git a/flang/lib/Optimizer/Dialect/CMakeLists.txt b/flang/lib/Optimizer/Dialect/CMakeLists.txt
index 745439b7e1e5e..a8235f841b879 100644
--- a/flang/lib/Optimizer/Dialect/CMakeLists.txt
+++ b/flang/lib/Optimizer/Dialect/CMakeLists.txt
@@ -1,4 +1,5 @@
add_subdirectory(Support)
+add_subdirectory(CUF)
add_flang_library(FIRDialect
FIRAttr.cpp
@@ -13,9 +14,11 @@ add_flang_library(FIRDialect
CanonicalizationPatternsIncGen
MLIRIR
FIROpsIncGen
+ CUFAttrsIncGen
intrinsics_gen
LINK_LIBS
+ CUFAttrs
FIRDialectSupport
MLIRArithDialect
MLIRBuiltinToLLVMIRTranslation
diff --git a/flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt b/flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt
new file mode 100644
index 0000000000000..d5ce5e0a7614d
--- /dev/null
+++ b/flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt
@@ -0,0 +1,22 @@
+add_subdirectory(Attributes)
+
+add_flang_library(CUFDialect
+ CUFDialect.cpp
+ CUFOps.cpp
+
+ DEPENDS
+ MLIRIR
+ CUFOpsIncGen
+
+ LINK_LIBS
+ CUFAttrs
+ FIRDialect
+ FIRDialectSupport
+ MLIRIR
+ MLIRTargetLLVMIRExport
+
+ LINK_COMPONENTS
+ AsmParser
+ AsmPrinter
+ Remarks
+)
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFDialect.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFDialect.cpp
new file mode 100644
index 0000000000000..47d3636df4f9f
--- /dev/null
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFDialect.cpp
@@ -0,0 +1,25 @@
+//===-- CUFDialect.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
+#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+
+#include "flang/Optimizer/Dialect/CUF/CUFDialect.cpp.inc"
+
+void cuf::CUFDialect::initialize() {
+ registerAttributes();
+ addOperations<
+#define GET_OP_LIST
+#include "flang/Optimizer/Dialect/CUF/CUFOps.cpp.inc"
+ >();
+}
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
new file mode 100644
index 0000000000000..870652c72fab7
--- /dev/null
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -0,0 +1,219 @@
+//===-- CUFOps.cpp --------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
+#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
+#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/Diagnostics.h"
+#include "mlir/IR/Matchers.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/PatternMatch.h"
+#include "llvm/ADT/SmallVector.h"
+
+//===----------------------------------------------------------------------===//
+// AllocOp
+//===----------------------------------------------------------------------===//
+
+static mlir::Type wrapAllocaResultType(mlir::Type intype) {
+ if (mlir::isa<fir::ReferenceType>(intype))
+ return {};
+ return fir::ReferenceType::get(intype);
+}
+
+void cuf::AllocOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
+ mlir::Type inType, llvm::StringRef uniqName,
+ llvm::StringRef bindcName,
+ cuf::DataAttributeAttr cudaAttr,
+ mlir::ValueRange typeparams, mlir::ValueRange shape,
+ llvm::ArrayRef<mlir::NamedAttribute> attributes) {
+ mlir::StringAttr nameAttr =
+ uniqName.empty() ? mlir::StringAttr{} : builder.getStringAttr(uniqName);
+ mlir::StringAttr bindcAttr =
+ bindcName.empty() ? mlir::StringAttr{} : builder.getStringAttr(bindcName);
+ build(builder, result, wrapAllocaResultType(inType),
+ mlir::TypeAttr::get(inType), nameAttr, bindcAttr, typeparams, shape,
+ cudaAttr);
+ result.addAttributes(attributes);
+}
+
+template <typename Op>
+static mlir::LogicalResult checkCudaAttr(Op op) {
+ if (op.getDataAttr() == cuf::DataAttribute::Device ||
+ op.getDataAttr() == cuf::DataAttribute::Managed ||
+ op.getDataAttr() == cuf::DataAttribute::Unified)
+ return mlir::success();
+ return op.emitOpError("expect device, managed or unified cuda attribute");
+}
+
+mlir::LogicalResult cuf::AllocOp::verify() { return checkCudaAttr(*this); }
+
+//===----------------------------------------------------------------------===//
+// FreeOp
+//===----------------------------------------------------------------------===//
+
+mlir::LogicalResult cuf::FreeOp::verify() { return checkCudaAttr(*this); }
+
+//===----------------------------------------------------------------------===//
+// AllocateOp
+//===----------------------------------------------------------------------===//
+
+mlir::LogicalResult cuf::AllocateOp::verify() {
+ if (getPinned() && getStream())
+ return emitOpError("pinned and stream cannot appears at the same time");
+ if (!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getBox().getType())))
+ return emitOpError(
+ "expect box to be a reference to a class or box type value");
+ if (getSource() &&
+ !mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getSource().getType())))
+ return emitOpError(
+ "expect source to be a reference to/or a class or box type value");
+ if (getErrmsg() &&
+ !mlir::isa<fir::BoxType>(fir::unwrapRefType(getErrmsg().getType())))
+ return emitOpError(
+ "expect errmsg to be a reference to/or a box type value");
+ if (getErrmsg() && !getHasStat())
+ return emitOpError("expect stat attribute when errmsg is provided");
+ return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// DeallocateOp
+//===----------------------------------------------------------------------===//
+
+mlir::LogicalResult cuf::DeallocateOp::verify() {
+ if (!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getBox().getType())))
+ return emitOpError(
+ "expect box to be a reference to class or box type value");
+ if (getErrmsg() &&
+ !mlir::isa<fir::BoxType>(fir::unwrapRefType(getErrmsg().getType())))
+ return emitOpError(
+ "expect errmsg to be a reference to/or a box type value");
+ if (getErrmsg() && !getHasStat())
+ return emitOpError("expect stat attribute when errmsg is provided");
+ return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// KernelOp
+//===----------------------------------------------------------------------===//
+
+llvm::SmallVector<mlir::Region *> cuf::KernelOp::getLoopRegions() {
+ return {&getRegion()};
+}
+
+mlir::ParseResult parseCUFKernelValues(
+ mlir::OpAsmParser &parser,
+ llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &values,
+ llvm::SmallVectorImpl<mlir::Type> &types) {
+ if (mlir::succeeded(parser.parseOptionalStar()))
+ return mlir::success();
+
+ if (mlir::succeeded(parser.parseOptionalLParen())) {
+ if (mlir::failed(parser.parseCommaSeparatedList(
+ mlir::AsmParser::Delimiter::None, [&]() {
+ if (parser.parseOperand(values.emplace_back()))
+ return mlir::failure();
+ return mlir::success();
+ })))
+ return mlir::failure();
+ auto builder = parser.getBuilder();
+ for (size_t i = 0; i < values.size(); i++) {
+ types.emplace_back(builder.getI32Type());
+ }
+ if (parser.parseRParen())
+ return mlir::failure();
+ } else {
+ if (parser.parseOperand(values.emplace_back()))
+ return mlir::failure();
+ auto builder = parser.getBuilder();
+ types.emplace_back(builder.getI32Type());
+ return mlir::success();
+ }
+ return mlir::success();
+}
+
+void printCUFKernelValues(mlir::OpAsmPrinter &p, mlir::Operation *op,
+ mlir::ValueRange values, mlir::TypeRange types) {
+ if (values.empty())
+ p << "*";
+
+ if (values.size() > 1)
+ p << "(";
+ llvm::interleaveComma(values, p, [&p](mlir::Value v) { p << v; });
+ if (values.size() > 1)
+ p << ")";
+}
+
+mlir::ParseResult parseCUFKernelLoopControl(
+ mlir::OpAsmParser &parser, mlir::Region ®ion,
+ llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &lowerbound,
+ llvm::SmallVectorImpl<mlir::Type> &lowerboundType,
+ llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &upperbound,
+ llvm::SmallVectorImpl<mlir::Type> &upperboundType,
+ llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &step,
+ llvm::SmallVectorImpl<mlir::Type> &stepType) {
+
+ llvm::SmallVector<mlir::OpAsmParser::Argument> inductionVars;
+ if (parser.parseLParen() ||
+ parser.parseArgumentList(inductionVars,
+ mlir::OpAsmParser::Delimiter::None,
+ /*allowType=*/true) ||
+ parser.parseRParen() || parser.parseEqual() || parser.parseLParen() ||
+ parser.parseOperandList(lowerbound, inductionVars.size(),
+ mlir::OpAsmParser::Delimiter::None) ||
+ parser.parseColonTypeList(lowerboundType) || parser.parseRParen() ||
+ parser.parseKeyword("to") || parser.parseLParen() ||
+ parser.parseOperandList(upperbound, inductionVars.size(),
+ mlir::OpAsmParser::Delimiter::None) ||
+ parser.parseColonTypeList(upperboundType) || parser.parseRParen() ||
+ parser.parseKeyword("step") || parser.parseLParen() ||
+ parser.parseOperandList(step, inductionVars.size(),
+ mlir::OpAsmParser::Delimiter::None) ||
+ parser.parseColonTypeList(stepType) || parser.parseRParen())
+ return mlir::failure();
+ return parser.parseRegion(region, inductionVars);
+}
+
+void printCUFKernelLoopControl(
+ mlir::OpAsmPrinter &p, mlir::Operation *op, mlir::Region ®ion,
+ mlir::ValueRange lowerbound, mlir::TypeRange lowerboundType,
+ mlir::ValueRange upperbound, mlir::TypeRange upperboundType,
+ mlir::ValueRange steps, mlir::TypeRange stepType) {
+ mlir::ValueRange regionArgs = region.front().getArguments();
+ if (!regionArgs.empty()) {
+ p << "(";
+ llvm::interleaveComma(
+ regionArgs, p, [&p](mlir::Value v) { p << v << " : " << v.getType(); });
+ p << ") = (" << lowerbound << " : " << lowerboundType << ") to ("
+ << upperbound << " : " << upperboundType << ") "
+ << " step (" << steps << " : " << stepType << ") ";
+ }
+ p.printRegion(region, /*printEntryBlockArgs=*/false);
+}
+
+mlir::LogicalResult cuf::KernelOp::verify() {
+ if (getLowerbound().size() != getUpperbound().size() ||
+ getLowerbound().size() != getStep().size())
+ return emitOpError(
+ "expect same number of values in lowerbound, upperbound and step");
+
+ return mlir::success();
+}
+
+// Tablegen operators
+
+#define GET_OP_CLASSES
+#include "flang/Optimizer/Dialect/CUF/CUFOps.cpp.inc"
diff --git a/flang/lib/Optimizer/Dialect/FIRAttr.cpp b/flang/lib/Optimizer/Dialect/FIRAttr.cpp
index 9ea3a0568f691..2faba63dfba07 100644
--- a/flang/lib/Optimizer/Dialect/FIRAttr.cpp
+++ b/flang/lib/Optimizer/Dialect/FIRAttr.cpp
@@ -298,7 +298,5 @@ void fir::printFirAttribute(FIROpsDialect *dialect, mlir::Attribute attr,
void FIROpsDialect::registerAttributes() {
addAttributes<ClosedIntervalAttr, ExactTypeAttr, FortranVariableFlagsAttr,
LowerBoundAttr, PointIntervalAttr, RealAttr, SubclassAttr,
- UpperBoundAttr, CUDADataAttributeAttr, CUDAProcAttributeAttr,
- CUDALaunchBoundsAttr, CUDAClusterDimsAttr,
- CUDADataTransferKindAttr>();
+ UpperBoundAttr>();
}
diff --git a/flang/lib/Optimizer/Dialect/FIRDialect.cpp b/flang/lib/Optimizer/Dialect/FIRDialect.cpp
index 4d1e8cd1405af..4b1dadaac6728 100644
--- a/flang/lib/Optimizer/Dialect/FIRDialect.cpp
+++ b/flang/lib/Optimizer/Dialect/FIRDialect.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
#include "flang/Optimizer/Dialect/FIRAttr.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIRType.h"
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index edf7f7f4b1a96..94113da9a46cf 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -3898,169 +3898,6 @@ mlir::LogicalResult fir::DeclareOp::verify() {
return fortranVar.verifyDeclareLikeOpImpl(getMemref());
}
-llvm::SmallVector<mlir::Region *> fir::CUDAKernelOp::getLoopRegions() {
- return {&getRegion()};
-}
-
-mlir::ParseResult parseCUFKernelValues(
- mlir::OpAsmParser &parser,
- llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &values,
- llvm::SmallVectorImpl<mlir::Type> &types) {
- if (mlir::succeeded(parser.parseOptionalStar()))
- return mlir::success();
-
- if (mlir::succeeded(parser.parseOptionalLParen())) {
- if (mlir::failed(parser.parseCommaSeparatedList(
- mlir::AsmParser::Delimiter::None, [&]() {
- if (parser.parseOperand(values.emplace_back()))
- return mlir::failure();
- return mlir::success();
- })))
- return mlir::failure();
- auto builder = parser.getBuilder();
- for (size_t i = 0; i < values.size(); i++) {
- types.emplace_back(builder.getI32Type());
- }
- if (parser.parseRParen())
- return mlir::failure();
- } else {
- if (parser.parseOperand(values.emplace_back()))
- return mlir::failure();
- auto builder = parser.getBuilder();
- types.emplace_back(builder.getI32Type());
- return mlir::success();
- }
- return mlir::success();
-}
-
-void printCUFKernelValues(mlir::OpAsmPrinter &p, mlir::Operation *op,
- mlir::ValueRange values, mlir::TypeRange types) {
- if (values.empty())
- p << "*";
-
- if (values.size() > 1)
- p << "(";
- llvm::interleaveComma(values, p, [&p](mlir::Value v) { p << v; });
- if (values.size() > 1)
- p << ")";
-}
-
-mlir::ParseResult parseCUFKernelLoopControl(
- mlir::OpAsmParser &parser, mlir::Region ®ion,
- llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &lowerbound,
- llvm::SmallVectorImpl<mlir::Type> &lowerboundType,
- llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &upperbound,
- llvm::SmallVectorImpl<mlir::Type> &upperboundType,
- llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &step,
- llvm::SmallVectorImpl<mlir::Type> &stepType) {
-
- llvm::SmallVector<mlir::OpAsmParser::Argument> inductionVars;
- if (parser.parseLParen() ||
- parser.parseArgumentList(inductionVars,
- mlir::OpAsmParser::Delimiter::None,
- /*allowType=*/true) ||
- parser.parseRParen() || parser.parseEqual() || parser.parseLParen() ||
- parser.parseOperandList(lowerbound, inductionVars.size(),
- mlir::OpAsmParser::Delimiter::None) ||
- parser.parseColonTypeList(lowerboundType) || parser.parseRParen() ||
- parser.parseKeyword("to") || parser.parseLParen() ||
- parser.parseOperandList(upperbound, inductionVars.size(),
- mlir::OpAsmParser::Delimiter::None) ||
- parser.parseColonTypeList(upperboundType) || parser.parseRParen() ||
- parser.parseKeyword("step") || parser.parseLParen() ||
- parser.parseOperandList(step, inductionVars.size(),
- mlir::OpAsmParser::Delimiter::None) ||
- parser.parseColonTypeList(stepType) || parser.parseRParen())
- return mlir::failure();
- return parser.parseRegion(region, inductionVars);
-}
-
-void printCUFKernelLoopControl(
- mlir::OpAsmPrinter &p, mlir::Operation *op, mlir::Region ®ion,
- mlir::ValueRange lowerbound, mlir::TypeRange lowerboundType,
- mlir::ValueRange upperbound, mlir::TypeRange upperboundType,
- mlir::ValueRange steps, mlir::TypeRange stepType) {
- mlir::ValueRange regionArgs = region.front().getArguments();
- if (!regionArgs.empty()) {
- p << "(";
- llvm::interleaveComma(
- regionArgs, p, [&p](mlir::Value v) { p << v << " : " << v.getType(); });
- p << ") = (" << lowerbound << " : " << lowerboundType << ") to ("
- << upperbound << " : " << upperboundType << ") "
- << " step (" << steps << " : " << stepType << ") ";
- }
- p.printRegion(region, /*printEntryBlockArgs=*/false);
-}
-
-mlir::LogicalResult fir::CUDAKernelOp::verify() {
- if (getLowerbound().size() != getUpperbound().size() ||
- getLowerbound().size() != getStep().size())
- return emitOpError(
- "expect same number of values in lowerbound, upperbound and step");
-
- return mlir::success();
-}
-
-mlir::LogicalResult fir::CUDAAllocateOp::verify() {
- if (getPinned() && getStream())
- return emitOpError("pinned and stream cannot appears at the same time");
- if (!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getBox().getType())))
- return emitOpError(
- "expect box to be a reference to a class or box type value");
- if (getSource() &&
- !mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getSource().getType())))
- return emitOpError(
- "expect source to be a reference to/or a class or box type value");
- if (getErrmsg() &&
- !mlir::isa<fir::BoxType>(fir::unwrapRefType(getErrmsg().getType())))
- return emitOpError(
- "expect errmsg to be a reference to/or a box type value");
- if (getErrmsg() && !getHasStat())
- return emitOpError("expect stat attribute when errmsg is provided");
- return mlir::success();
-}
-
-mlir::LogicalResult fir::CUDADeallocateOp::verify() {
- if (!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getBox().getType())))
- return emitOpError(
- "expect box to be a reference to class or box type value");
- if (getErrmsg() &&
- !mlir::isa<fir::BoxType>(fir::unwrapRefType(getErrmsg().getType())))
- return emitOpError(
- "expect errmsg to be a reference to/or a box type value");
- if (getErrmsg() && !getHasStat())
- return emitOpError("expect stat attribute when errmsg is provided");
- return mlir::success();
-}
-
-void fir::CUDAAllocOp::build(
- mlir::OpBuilder &builder, mlir::OperationState &result, mlir::Type inType,
- llvm::StringRef uniqName, llvm::StringRef bindcName,
- fir::CUDADataAttributeAttr cudaAttr, mlir::ValueRange typeparams,
- mlir::ValueRange shape, llvm::ArrayRef<mlir::NamedAttribute> attributes) {
- mlir::StringAttr nameAttr =
- uniqName.empty() ? mlir::StringAttr{} : builder.getStringAttr(uniqName);
- mlir::StringAttr bindcAttr =
- bindcName.empty() ? mlir::StringAttr{} : builder.getStringAttr(bindcName);
- build(builder, result, wrapAllocaResultType(inType),
- mlir::TypeAttr::get(inType), nameAttr, bindcAttr, typeparams, shape,
- cudaAttr);
- result.addAttributes(attributes);
-}
-
-template <typename Op>
-static mlir::LogicalResult checkCudaAttr(Op op) {
- if (op.getCudaAttr() == fir::CUDADataAttribute::Device ||
- op.getCudaAttr() == fir::CUDADataAttribute::Managed ||
- op.getCudaAttr() == fir::CUDADataAttribute::Unified)
- return mlir::success();
- return op.emitOpError("expect device, managed or unified cuda attribute");
-}
-
-mlir::LogicalResult fir::CUDAAllocOp::verify() { return checkCudaAttr(*this); }
-
-mlir::LogicalResult fir::CUDAFreeOp::verify() { return checkCudaAttr(*this); }
-
//===----------------------------------------------------------------------===//
// FIROpsDialect
//===----------------------------------------------------------------------===//
diff --git a/flang/lib/Optimizer/HLFIR/IR/CMakeLists.txt b/flang/lib/Optimizer/HLFIR/IR/CMakeLists.txt
index dc9e080b0f8be..5004d74ffc7d3 100644
--- a/flang/lib/Optimizer/HLFIR/IR/CMakeLists.txt
+++ b/flang/lib/Optimizer/HLFIR/IR/CMakeLists.txt
@@ -5,11 +5,13 @@ add_flang_library(HLFIRDialect
HLFIROps.cpp
DEPENDS
+ CUFAttrs
FIRDialect
HLFIROpsIncGen
${dialect_libs}
LINK_LIBS
+ CUFAttrs
FIRDialect
MLIRIR
${dialect_libs}
diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
index c232ae165d4c8..11196353b07c7 100644
--- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
+++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
@@ -127,14 +127,14 @@ void hlfir::DeclareOp::build(mlir::OpBuilder &builder,
mlir::ValueRange typeparams,
mlir::Value dummy_scope,
fir::FortranVariableFlagsAttr fortran_attrs,
- fir::CUDADataAttributeAttr cuda_attr) {
+ cuf::DataAttributeAttr data_attr) {
auto nameAttr = builder.getStringAttr(uniq_name);
mlir::Type inputType = memref.getType();
bool hasExplicitLbs = hasExplicitLowerBounds(shape);
mlir::Type hlfirVariableType =
getHLFIRVariableType(inputType, hasExplicitLbs);
build(builder, result, {hlfirVariableType, inputType}, memref, shape,
- typeparams, dummy_scope, nameAttr, fortran_attrs, cuda_attr);
+ typeparams, dummy_scope, nameAttr, fortran_attrs, data_attr);
}
mlir::LogicalResult hlfir::DeclareOp::verify() {
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
index ad569ce3b41f1..8292b9371f88f 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
@@ -12,10 +12,12 @@ add_flang_library(HLFIRTransforms
DEPENDS
FIRDialect
+ CUFAttrs
HLFIROpsIncGen
${dialect_libs}
LINK_LIBS
+ CUFAttrs
FIRAnalysis
FIRDialect
FIRBuilder
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
index 3570e0011ca7e..e56595d1c8e23 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp
@@ -320,16 +320,16 @@ class DeclareOpConversion : public mlir::OpRewritePattern<hlfir::DeclareOp> {
mlir::Location loc = declareOp->getLoc();
mlir::Value memref = declareOp.getMemref();
fir::FortranVariableFlagsAttr fortranAttrs;
- fir::CUDADataAttributeAttr cudaAttr;
+ cuf::DataAttributeAttr dataAttr;
if (auto attrs = declareOp.getFortranAttrs())
fortranAttrs =
fir::FortranVariableFlagsAttr::get(rewriter.getContext(), *attrs);
- if (auto attr = declareOp.getCudaAttr())
- cudaAttr = fir::CUDADataAttributeAttr::get(rewriter.getContext(), *attr);
+ if (auto attr = declareOp.getDataAttr())
+ dataAttr = cuf::DataAttributeAttr::get(rewriter.getContext(), *attr);
auto firDeclareOp = rewriter.create<fir::DeclareOp>(
loc, memref.getType(), memref, declareOp.getShape(),
declareOp.getTypeparams(), declareOp.getDummyScope(),
- declareOp.getUniqName(), fortranAttrs, cudaAttr);
+ declareOp.getUniqName(), fortranAttrs, dataAttr);
// Propagate other attributes from hlfir.declare to fir.declare.
// OpenACC's acc.declare is one example. Right now, the propagation
diff --git a/flang/test/Fir/cuf-invalid.fir b/flang/test/Fir/cuf-invalid.fir
index 5a12e3c1a4bf2..6e18e48ac82fc 100644
--- a/flang/test/Fir/cuf-invalid.fir
+++ b/flang/test/Fir/cuf-invalid.fir
@@ -4,11 +4,11 @@ func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%1 = fir.alloca i32
%pinned = fir.alloca i1
- %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+ %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%s = fir.load %1 : !fir.ref<i32>
- // expected-error at +1{{'fir.cuda_allocate' op pinned and stream cannot appears at the same time}}
- %13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> stream(%s : i32) pinned(%pinned : !fir.ref<i1>) {cuda_attr = #fir.cuda<device>} -> i32
+ // expected-error at +1{{'cuf.allocate' op pinned and stream cannot appears at the same time}}
+ %13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> stream(%s : i32) pinned(%pinned : !fir.ref<i1>) {data_attr = #cuf.cuda<device>} -> i32
return
}
@@ -16,8 +16,8 @@ func.func @_QPsub1() {
func.func @_QPsub1() {
%1 = fir.alloca i32
- // expected-error at +1{{'fir.cuda_allocate' op expect box to be a reference to a class or box type value}}
- %2 = fir.cuda_allocate %1 : !fir.ref<i32> {cuda_attr = #fir.cuda<device>} -> i32
+ // expected-error at +1{{'cuf.allocate' op expect box to be a reference to a class or box type value}}
+ %2 = cuf.allocate %1 : !fir.ref<i32> {data_attr = #cuf.cuda<device>} -> i32
return
}
@@ -25,15 +25,15 @@ func.func @_QPsub1() {
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
- %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+ %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%c100 = arith.constant 100 : index
%7 = fir.alloca !fir.char<1,100> {bindc_name = "msg", uniq_name = "_QFsub1Emsg"}
%8:2 = hlfir.declare %7 typeparams %c100 {uniq_name = "_QFsub1Emsg"} : (!fir.ref<!fir.char<1,100>>, index) -> (!fir.ref<!fir.char<1,100>>, !fir.ref<!fir.char<1,100>>)
%9 = fir.embox %8#1 : (!fir.ref<!fir.char<1,100>>) -> !fir.box<!fir.char<1,100>>
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%16 = fir.convert %9 : (!fir.box<!fir.char<1,100>>) -> !fir.box<none>
- // expected-error at +1{{'fir.cuda_allocate' op expect stat attribute when errmsg is provided}}
- %13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {cuda_attr = #fir.cuda<device>} -> i32
+ // expected-error at +1{{'cuf.allocate' op expect stat attribute when errmsg is provided}}
+ %13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {data_attr = #cuf.cuda<device>} -> i32
return
}
@@ -41,11 +41,11 @@ func.func @_QPsub1() {
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
- %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+ %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%1 = fir.alloca i32
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
- // expected-error at +1{{'fir.cuda_allocate' op expect errmsg to be a reference to/or a box type value}}
- %13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> errmsg(%1 : !fir.ref<i32>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
+ // expected-error at +1{{'cuf.allocate' op expect errmsg to be a reference to/or a box type value}}
+ %13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> errmsg(%1 : !fir.ref<i32>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
return
}
@@ -53,8 +53,8 @@ func.func @_QPsub1() {
func.func @_QPsub1() {
%1 = fir.alloca i32
- // expected-error at +1{{'fir.cuda_deallocate' op expect box to be a reference to class or box type value}}
- %2 = fir.cuda_deallocate %1 : !fir.ref<i32> {cuda_attr = #fir.cuda<device>} -> i32
+ // expected-error at +1{{'cuf.deallocate' op expect box to be a reference to class or box type value}}
+ %2 = cuf.deallocate %1 : !fir.ref<i32> {data_attr = #cuf.cuda<device>} -> i32
return
}
@@ -62,11 +62,11 @@ func.func @_QPsub1() {
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
- %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+ %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%1 = fir.alloca i32
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
- // expected-error at +1{{'fir.cuda_deallocate' op expect errmsg to be a reference to/or a box type value}}
- %13 = fir.cuda_deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%1 : !fir.ref<i32>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
+ // expected-error at +1{{'cuf.deallocate' op expect errmsg to be a reference to/or a box type value}}
+ %13 = cuf.deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%1 : !fir.ref<i32>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
return
}
@@ -74,32 +74,32 @@ func.func @_QPsub1() {
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
- %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+ %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%c100 = arith.constant 100 : index
%7 = fir.alloca !fir.char<1,100> {bindc_name = "msg", uniq_name = "_QFsub1Emsg"}
%8:2 = hlfir.declare %7 typeparams %c100 {uniq_name = "_QFsub1Emsg"} : (!fir.ref<!fir.char<1,100>>, index) -> (!fir.ref<!fir.char<1,100>>, !fir.ref<!fir.char<1,100>>)
%9 = fir.embox %8#1 : (!fir.ref<!fir.char<1,100>>) -> !fir.box<!fir.char<1,100>>
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%16 = fir.convert %9 : (!fir.box<!fir.char<1,100>>) -> !fir.box<none>
- // expected-error at +1{{'fir.cuda_deallocate' op expect stat attribute when errmsg is provided}}
- %13 = fir.cuda_deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {cuda_attr = #fir.cuda<device>} -> i32
+ // expected-error at +1{{'cuf.deallocate' op expect stat attribute when errmsg is provided}}
+ %13 = cuf.deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {data_attr = #cuf.cuda<device>} -> i32
return
}
// -----
func.func @_QPsub1() {
- // expected-error at +1{{'fir.cuda_alloc' op expect device, managed or unified cuda attribute}}
- %0 = fir.cuda_alloc f32 {bindc_name = "r", cuda_attr = #fir.cuda<pinned>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
- fir.cuda_free %0 : !fir.ref<f32> {cuda_attr = #fir.cuda<constant>}
+ // expected-error at +1{{'cuf.alloc' op expect device, managed or unified cuda attribute}}
+ %0 = cuf.alloc f32 {bindc_name = "r", data_attr = #cuf.cuda<pinned>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
+ cuf.free %0 : !fir.ref<f32> {data_attr = #cuf.cuda<constant>}
return
}
// -----
func.func @_QPsub1() {
- %0 = fir.cuda_alloc f32 {bindc_name = "r", cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
- // expected-error at +1{{'fir.cuda_free' op expect device, managed or unified cuda attribute}}
- fir.cuda_free %0 : !fir.ref<f32> {cuda_attr = #fir.cuda<constant>}
+ %0 = cuf.alloc f32 {bindc_name = "r", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
+ // expected-error at +1{{'cuf.free' op expect device, managed or unified cuda attribute}}
+ cuf.free %0 : !fir.ref<f32> {data_attr = #cuf.cuda<constant>}
return
}
diff --git a/flang/test/Fir/cuf.mlir b/flang/test/Fir/cuf.mlir
index 8e2346def43ea..188044d04b848 100644
--- a/flang/test/Fir/cuf.mlir
+++ b/flang/test/Fir/cuf.mlir
@@ -4,85 +4,85 @@
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
- %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+ %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
- %13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> {cuda_attr = #fir.cuda<device>} -> i32
- %14 = fir.cuda_deallocate %11 : !fir.ref<!fir.box<none>> {cuda_attr = #fir.cuda<device>} -> i32
+ %13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> {data_attr = #cuf.cuda<device>} -> i32
+ %14 = cuf.deallocate %11 : !fir.ref<!fir.box<none>> {data_attr = #cuf.cuda<device>} -> i32
return
}
-// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> {cuda_attr = #fir.cuda<device>} -> i32
-// CHECK: fir.cuda_deallocate %{{.*}} : !fir.ref<!fir.box<none>> {cuda_attr = #fir.cuda<device>} -> i32
+// CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<none>> {data_attr = #cuf.cuda<device>} -> i32
+// CHECK: cuf.deallocate %{{.*}} : !fir.ref<!fir.box<none>> {data_attr = #cuf.cuda<device>} -> i32
// -----
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%1 = fir.alloca i32
- %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+ %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%s = fir.load %1 : !fir.ref<i32>
- %13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> stream(%s : i32) {cuda_attr = #fir.cuda<device>} -> i32
+ %13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> stream(%s : i32) {data_attr = #cuf.cuda<device>} -> i32
return
}
-// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> stream(%{{.*}} : i32) {cuda_attr = #fir.cuda<device>} -> i32
+// CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<none>> stream(%{{.*}} : i32) {data_attr = #cuf.cuda<device>} -> i32
// -----
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%1 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub1Eb"}
- %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+ %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%5:2 = hlfir.declare %1 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%12 = fir.convert %5#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
- %13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> source(%12 : !fir.ref<!fir.box<none>>) {cuda_attr = #fir.cuda<device>} -> i32
+ %13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> source(%12 : !fir.ref<!fir.box<none>>) {data_attr = #cuf.cuda<device>} -> i32
return
}
-// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> source(%{{.*}} : !fir.ref<!fir.box<none>>) {cuda_attr = #fir.cuda<device>} -> i32
+// CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<none>> source(%{{.*}} : !fir.ref<!fir.box<none>>) {data_attr = #cuf.cuda<device>} -> i32
// -----
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
%pinned = fir.alloca i1
- %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+ %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
- %13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> pinned(%pinned : !fir.ref<i1>) {cuda_attr = #fir.cuda<device>} -> i32
+ %13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> pinned(%pinned : !fir.ref<i1>) {data_attr = #cuf.cuda<device>} -> i32
return
}
-// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> pinned(%{{.*}} : !fir.ref<i1>) {cuda_attr = #fir.cuda<device>} -> i32
+// CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<none>> pinned(%{{.*}} : !fir.ref<i1>) {data_attr = #cuf.cuda<device>} -> i32
// -----
func.func @_QPsub1() {
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
- %4:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+ %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
%c100 = arith.constant 100 : index
%7 = fir.alloca !fir.char<1,100> {bindc_name = "msg", uniq_name = "_QFsub1Emsg"}
%8:2 = hlfir.declare %7 typeparams %c100 {uniq_name = "_QFsub1Emsg"} : (!fir.ref<!fir.char<1,100>>, index) -> (!fir.ref<!fir.char<1,100>>, !fir.ref<!fir.char<1,100>>)
%9 = fir.embox %8#1 : (!fir.ref<!fir.char<1,100>>) -> !fir.box<!fir.char<1,100>>
%11 = fir.convert %4#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>>
%16 = fir.convert %9 : (!fir.box<!fir.char<1,100>>) -> !fir.box<none>
- %13 = fir.cuda_allocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
- %14 = fir.cuda_deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
+ %13 = cuf.allocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
+ %14 = cuf.deallocate %11 : !fir.ref<!fir.box<none>> errmsg(%16 : !fir.box<none>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
return
}
-// CHECK: fir.cuda_allocate %{{.*}} : !fir.ref<!fir.box<none>> errmsg(%{{.*}} : !fir.box<none>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
-// CHECK: fir.cuda_deallocate %{{.*}} : !fir.ref<!fir.box<none>> errmsg(%{{.*}} : !fir.box<none>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
+// CHECK: cuf.allocate %{{.*}} : !fir.ref<!fir.box<none>> errmsg(%{{.*}} : !fir.box<none>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
+// CHECK: cuf.deallocate %{{.*}} : !fir.ref<!fir.box<none>> errmsg(%{{.*}} : !fir.box<none>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
// -----
func.func @_QPsub1() {
- %0 = fir.cuda_alloc f32 {bindc_name = "r", cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
- fir.cuda_free %0 : !fir.ref<f32> {cuda_attr = #fir.cuda<device>}
+ %0 = cuf.alloc f32 {bindc_name = "r", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Er"} -> !fir.ref<f32>
+ cuf.free %0 : !fir.ref<f32> {data_attr = #cuf.cuda<device>}
return
}
-// CHECK: fir.cuda_alloc
-// CHECK: fir.cuda_free
+// CHECK: cuf.alloc
+// CHECK: cuf.free
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index eff5f13669e90..74a3ec100a8f2 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -11,11 +11,11 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub1()
! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub1Ea"}
-! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: fir.call @_FortranAAllocatableSetBounds
-! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
+! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
-! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
+! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: %[[BOX_LOAD:.*]] = fir.load %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: %[[ADDR:.*]] = fir.box_addr %[[BOX_LOAD]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
@@ -23,7 +23,7 @@ end subroutine
! CHECK: %[[C0:.*]] = arith.constant 0 : i64
! CHECK: %[[NE_C0:.*]] = arith.cmpi ne, %[[ADDR_I64]], %[[C0]] : i64
! CHECK: fir.if %[[NE_C0]] {
-! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
+! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: }
subroutine sub2()
@@ -36,18 +36,18 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub2()
! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub2Ea"}
-! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub2Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub2Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ISTAT:.*]] = fir.alloca i32 {bindc_name = "istat", uniq_name = "_QFsub2Eistat"}
! CHECK: %[[ISTAT_DECL:.*]]:2 = hlfir.declare %[[ISTAT]] {uniq_name = "_QFsub2Eistat"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: fir.call @_FortranAAllocatableSetBounds
-! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>, hasStat} -> i32
+! CHECK: %[[STAT:.*]] = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>, hasStat} -> i32
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
-! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>, hasStat} -> i32
+! CHECK: %[[STAT:.*]] = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>, hasStat} -> i32
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
! CHECK: fir.if %{{.*}} {
-! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<managed>} -> i32
+! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<managed>} -> i32
! CHECK: }
subroutine sub3()
@@ -58,13 +58,13 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub3()
! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?x?xi32>>> {bindc_name = "a", uniq_name = "_QFsub3Ea"}
-! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub3Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>)
+! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub3Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>>)
! CHECK: %[[PLOG:.*]] = fir.alloca !fir.logical<4> {bindc_name = "plog", uniq_name = "_QFsub3Eplog"}
! CHECK: %[[PLOG_DECL:.*]]:2 = hlfir.declare %5 {uniq_name = "_QFsub3Eplog"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
! CHECK-2: fir.call @_FortranAAllocatableSetBounds
-! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> pinned(%[[PLOG_DECL]]#1 : !fir.ref<!fir.logical<4>>) {cuda_attr = #fir.cuda<pinned>} -> i32
+! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> pinned(%[[PLOG_DECL]]#1 : !fir.ref<!fir.logical<4>>) {data_attr = #cuf.cuda<pinned>} -> i32
! CHECK: fir.if %{{.*}} {
-! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> {cuda_attr = #fir.cuda<pinned>} -> i32
+! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi32>>>> {data_attr = #cuf.cuda<pinned>} -> i32
! CHECK: }
subroutine sub4()
@@ -75,14 +75,14 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub4()
! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub4Ea"}
-! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %0 {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub4Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub4Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ISTREAM:.*]] = fir.alloca i32 {bindc_name = "istream", uniq_name = "_QFsub4Eistream"}
! CHECK: %[[ISTREAM_DECL:.*]]:2 = hlfir.declare %[[ISTREAM]] {uniq_name = "_QFsub4Eistream"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: fir.call @_FortranAAllocatableSetBounds
! CHECK: %[[STREAM:.*]] = fir.load %[[ISTREAM_DECL]]#0 : !fir.ref<i32>
-! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> stream(%[[STREAM]] : i32) {cuda_attr = #fir.cuda<device>} -> i32
+! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> stream(%[[STREAM]] : i32) {data_attr = #cuf.cuda<device>} -> i32
! CHECK: fir.if %{{.*}} {
-! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
+! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: }
subroutine sub5()
@@ -93,16 +93,16 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub5()
! CHECK: %[[BOX_A:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub5Ea"}
-! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub5Eb"}
! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub5Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[LOAD_B:.*]] = fir.load %[[BOX_B_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: fir.call @_FortranAAllocatableSetBounds
-! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> source(%[[LOAD_B]] : !fir.box<!fir.heap<!fir.array<?xf32>>>) {cuda_attr = #fir.cuda<device>} -> i32
+! CHECK: %{{.*}} = cuf.allocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> source(%[[LOAD_B]] : !fir.box<!fir.heap<!fir.array<?xf32>>>) {data_attr = #cuf.cuda<device>} -> i32
! CHECK: fir.if
! CHECK: fir.freemem
! CHECK: fir.if %{{.*}} {
-! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
+! CHECK: %{{.*}} = cuf.deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: }
subroutine sub6()
@@ -113,14 +113,14 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub6()
! CHECK: %[[BOX_A:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub6Ea"}
-! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "b", uniq_name = "_QFsub6Eb"}
! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub6Eb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[LOAD_B:.*]] = fir.load %[[BOX_B_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
! CHECK: fir.call @_FortranAAllocatableApplyMold
-! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
+! CHECK: %{{.*}} = cuf.allocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: fir.if %{{.*}} {
-! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
+! CHECK: %{{.*}} = cuf.deallocate %[[BOX_A_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: }
subroutine sub7()
@@ -134,19 +134,19 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub7()
! CHECK: %[[BOX:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub7Ea"}
-! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub7Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub7Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
! CHECK: %[[ERR:.*]] = fir.alloca !fir.char<1,50> {bindc_name = "err", uniq_name = "_QFsub7Eerr"}
! CHECK: %[[ERR_DECL:.*]]:2 = hlfir.declare %[[ERR]] typeparams %{{.*}} {uniq_name = "_QFsub7Eerr"} : (!fir.ref<!fir.char<1,50>>, index) -> (!fir.ref<!fir.char<1,50>>, !fir.ref<!fir.char<1,50>>)
! CHECK: %[[ISTAT:.*]] = fir.alloca i32 {bindc_name = "istat", uniq_name = "_QFsub7Eistat"}
! CHECK: %[[ISTAT_DECL:.*]]:2 = hlfir.declare %[[ISTAT]] {uniq_name = "_QFsub7Eistat"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[ERR_BOX:.*]] = fir.embox %[[ERR_DECL]]#1 : (!fir.ref<!fir.char<1,50>>) -> !fir.box<!fir.char<1,50>>
! CHECK: fir.call @_FortranAAllocatableSetBounds
-! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%[[ERR_BOX]] : !fir.box<!fir.char<1,50>>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
+! CHECK: %[[STAT:.*]] = cuf.allocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%[[ERR_BOX]] : !fir.box<!fir.char<1,50>>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
! CHECK: %[[ERR_BOX:.*]] = fir.embox %[[ERR_DECL]]#1 : (!fir.ref<!fir.char<1,50>>) -> !fir.box<!fir.char<1,50>>
-! CHECK: %[[STAT:.*]] = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%15 : !fir.box<!fir.char<1,50>>) {cuda_attr = #fir.cuda<device>, hasStat} -> i32
+! CHECK: %[[STAT:.*]] = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> errmsg(%15 : !fir.box<!fir.char<1,50>>) {data_attr = #cuf.cuda<device>, hasStat} -> i32
! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref<i32>
! CHECK: fir.if %{{.*}} {
-! CHECK: %{{.*}} = fir.cuda_deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {cuda_attr = #fir.cuda<device>} -> i32
+! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} -> i32
! CHECK: }
diff --git a/flang/test/Lower/CUDA/cuda-data-attribute.cuf b/flang/test/Lower/CUDA/cuda-data-attribute.cuf
index 3eb42a6a5d40f..f7f58a43a1439 100644
--- a/flang/test/Lower/CUDA/cuda-data-attribute.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-attribute.cuf
@@ -5,13 +5,13 @@
module cuda_var
real, constant :: mod_a_rc
-! CHECK: fir.global @_QMcuda_varEmod_a_rc {cuda_attr = #fir.cuda<constant>} : f32
+! CHECK: fir.global @_QMcuda_varEmod_a_rc {data_attr = #cuf.cuda<constant>} : f32
real, device :: mod_b_ra
-! CHECK: fir.global @_QMcuda_varEmod_b_ra {cuda_attr = #fir.cuda<device>} : f32
+! CHECK: fir.global @_QMcuda_varEmod_b_ra {data_attr = #cuf.cuda<device>} : f32
real, allocatable, managed :: mod_c_rm
-! CHECK: fir.global @_QMcuda_varEmod_c_rm {cuda_attr = #fir.cuda<managed>} : !fir.box<!fir.heap<f32>>
+! CHECK: fir.global @_QMcuda_varEmod_c_rm {data_attr = #cuf.cuda<managed>} : !fir.box<!fir.heap<f32>>
real, allocatable, pinned :: mod_d_rp
-! CHECK: fir.global @_QMcuda_varEmod_d_rp {cuda_attr = #fir.cuda<pinned>} : !fir.box<!fir.heap<f32>>
+! CHECK: fir.global @_QMcuda_varEmod_d_rp {data_attr = #cuf.cuda<pinned>} : !fir.box<!fir.heap<f32>>
contains
@@ -23,44 +23,44 @@ subroutine local_var_attrs
end subroutine
! CHECK-LABEL: func.func @_QMcuda_varPlocal_var_attrs()
-! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuda_varFlocal_var_attrsErd"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErm"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
-! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErp"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
-! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<unified>, uniq_name = "_QMcuda_varFlocal_var_attrsEru"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QMcuda_varFlocal_var_attrsErd"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErm"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
+! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErp"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
+! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<unified>, uniq_name = "_QMcuda_varFlocal_var_attrsEru"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuda_varFlocal_var_attrsErd"} : (!fir.ref<f32>) -> !fir.ref<f32>
-! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErm"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> !fir.ref<!fir.box<!fir.heap<f32>>>
-! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErp"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> !fir.ref<!fir.box<!fir.heap<f32>>>
-! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda<unified>, uniq_name = "_QMcuda_varFlocal_var_attrsEru"} : (!fir.ref<f32>) -> !fir.ref<f32>
+! FIR: %{{.*}} = fir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QMcuda_varFlocal_var_attrsErd"} : (!fir.ref<f32>) -> !fir.ref<f32>
+! FIR: %{{.*}} = fir.declare %{{.*}} {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErm"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> !fir.ref<!fir.box<!fir.heap<f32>>>
+! FIR: %{{.*}} = fir.declare %{{.*}} {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFlocal_var_attrsErp"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> !fir.ref<!fir.box<!fir.heap<f32>>>
+! FIR: %{{.*}} = fir.declare %{{.*}} {data_attr = #cuf.cuda<unified>, uniq_name = "_QMcuda_varFlocal_var_attrsEru"} : (!fir.ref<f32>) -> !fir.ref<f32>
subroutine dummy_arg_device(dd)
real, device :: dd
end subroutine
! CHECK-LABEL: func.func @_QMcuda_varPdummy_arg_device(
-! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<f32> {fir.bindc_name = "dd", fir.cuda_attr = #fir.cuda<device>}) {
-! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuda_varFdummy_arg_deviceEdd"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<f32> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "dd"}) {
+! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {data_attr = #cuf.cuda<device>, uniq_name = "_QMcuda_varFdummy_arg_deviceEdd"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
subroutine dummy_arg_managed(dm)
real, allocatable, managed :: dm
end subroutine
! CHECK-LABEL: func.func @_QMcuda_varPdummy_arg_managed(
-! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<f32>>> {fir.bindc_name = "dm", fir.cuda_attr = #fir.cuda<managed>}) {
-! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {cuda_attr = #fir.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFdummy_arg_managedEdm"} : (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<f32>>> {cuf.data_attr = #cuf.cuda<managed>, fir.bindc_name = "dm"}) {
+! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFdummy_arg_managedEdm"} : (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
subroutine dummy_arg_pinned(dp)
real, allocatable, pinned :: dp
end subroutine
! CHECK-LABEL: func.func @_QMcuda_varPdummy_arg_pinned(
-! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<f32>>> {fir.bindc_name = "dp", fir.cuda_attr = #fir.cuda<pinned>}) {
-! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {cuda_attr = #fir.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFdummy_arg_pinnedEdp"} : (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<f32>>> {cuf.data_attr = #cuf.cuda<pinned>, fir.bindc_name = "dp"}) {
+! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {data_attr = #cuf.cuda<pinned>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMcuda_varFdummy_arg_pinnedEdp"} : (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
subroutine dummy_arg_unified(du)
real, unified :: du
end subroutine
! CHECK-LABEL: func.func @_QMcuda_varPdummy_arg_unified(
-! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<f32> {fir.bindc_name = "du", fir.cuda_attr = #fir.cuda<unified>})
-! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {cuda_attr = #fir.cuda<unified>, uniq_name = "_QMcuda_varFdummy_arg_unifiedEdu"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<f32> {cuf.data_attr = #cuf.cuda<unified>, fir.bindc_name = "du"})
+! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {data_attr = #cuf.cuda<unified>, uniq_name = "_QMcuda_varFdummy_arg_unifiedEdu"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
subroutine cuda_alloc_free(n)
integer :: n
@@ -70,27 +70,27 @@ subroutine cuda_alloc_free(n)
end
! CHECK-LABEL: func.func @_QMcuda_varPcuda_alloc_free
-! CHECK: %[[ALLOC_A:.*]] = fir.cuda_alloc !fir.array<10xf32> {bindc_name = "a", cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} -> !fir.ref<!fir.array<10xf32>>
+! CHECK: %[[ALLOC_A:.*]] = cuf.alloc !fir.array<10xf32> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} -> !fir.ref<!fir.array<10xf32>>
! CHECK: %[[SHAPE:.*]] = fir.shape %c10 : (index) -> !fir.shape<1>
-! CHECK: %[[DECL_A:.*]]:2 = hlfir.declare %[[ALLOC_A]](%[[SHAPE]]) {cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
+! CHECK: %[[DECL_A:.*]]:2 = hlfir.declare %[[ALLOC_A]](%[[SHAPE]]) {data_attr = #cuf.cuda<device>, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} : (!fir.ref<!fir.array<10xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xf32>>, !fir.ref<!fir.array<10xf32>>)
-! CHECK: %[[ALLOC_U:.*]] = fir.cuda_alloc i32 {bindc_name = "u", cuda_attr = #fir.cuda<unified>, uniq_name = "_QMcuda_varFcuda_alloc_freeEu"} -> !fir.ref<i32>
-! CHECK: %[[DECL_U:.*]]:2 = hlfir.declare %[[ALLOC_U]] {cuda_attr = #fir.cuda<unified>, uniq_name = "_QMcuda_varFcuda_alloc_freeEu"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[ALLOC_U:.*]] = cuf.alloc i32 {bindc_name = "u", data_attr = #cuf.cuda<unified>, uniq_name = "_QMcuda_varFcuda_alloc_freeEu"} -> !fir.ref<i32>
+! CHECK: %[[DECL_U:.*]]:2 = hlfir.declare %[[ALLOC_U]] {data_attr = #cuf.cuda<unified>, uniq_name = "_QMcuda_varFcuda_alloc_freeEu"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: %[[ALLOC_B:.*]] = fir.cuda_alloc !fir.array<?xf32>, %{{.*}} : index {bindc_name = "b", cuda_attr = #fir.cuda<managed>, uniq_name = "_QMcuda_varFcuda_alloc_freeEb"} -> !fir.ref<!fir.array<?xf32>>
+! CHECK: %[[ALLOC_B:.*]] = cuf.alloc !fir.array<?xf32>, %{{.*}} : index {bindc_name = "b", data_attr = #cuf.cuda<managed>, uniq_name = "_QMcuda_varFcuda_alloc_freeEb"} -> !fir.ref<!fir.array<?xf32>>
! CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1>
-! CHECK: %[[DECL_B:.*]]:2 = hlfir.declare %[[ALLOC_B]](%[[SHAPE]]) {cuda_attr = #fir.cuda<managed>, uniq_name = "_QMcuda_varFcuda_alloc_freeEb"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[DECL_B:.*]]:2 = hlfir.declare %[[ALLOC_B]](%[[SHAPE]]) {data_attr = #cuf.cuda<managed>, uniq_name = "_QMcuda_varFcuda_alloc_freeEb"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
-! CHECK: fir.cuda_free %[[DECL_B]]#1 : !fir.ref<!fir.array<?xf32>> {cuda_attr = #fir.cuda<managed>}
-! CHECK: fir.cuda_free %[[DECL_U]]#1 : !fir.ref<i32> {cuda_attr = #fir.cuda<unified>}
-! CHECK: fir.cuda_free %[[DECL_A]]#1 : !fir.ref<!fir.array<10xf32>> {cuda_attr = #fir.cuda<device>}
+! CHECK: cuf.free %[[DECL_B]]#1 : !fir.ref<!fir.array<?xf32>> {data_attr = #cuf.cuda<managed>}
+! CHECK: cuf.free %[[DECL_U]]#1 : !fir.ref<i32> {data_attr = #cuf.cuda<unified>}
+! CHECK: cuf.free %[[DECL_A]]#1 : !fir.ref<!fir.array<10xf32>> {data_attr = #cuf.cuda<device>}
subroutine dummy(x)
real, target, device :: x
end subroutine
! CHECK: func.func @_QMcuda_varPdummy
-! CHECK-NOT: fir.cuda_free
+! CHECK-NOT: cuf.free
end module
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index 0a2608639bce7..084314ed63ecd 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -29,37 +29,37 @@ end
! CHECK-LABEL: func.func @_QPsub1()
-! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub1Eadev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Eadev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub1Eahost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub1Em"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Em"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[C1:.*]] = arith.constant 1 : i32
! CHECK: %[[LOADED_I:.*]] = fir.load %[[I]]#0 : !fir.ref<i32>
! CHECK: %[[ADD:.*]] = arith.addi %[[C1]], %[[LOADED_I]] : i32
! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[ADD]] {uniq_name = ".cuf_host_tmp"} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
-! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #fir.cuda_transfer<host_device>} : !fir.ref<i32>, !fir.ref<i32>
+! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<i32>, !fir.ref<i32>
! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref<i32>, i1
! CHECK: %[[C1:.*]] = arith.constant 1 : i32
! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[C1]] {uniq_name = ".cuf_host_tmp"} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
-! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #fir.cuda_transfer<host_device>} : !fir.ref<i32>, !fir.ref<i32>
+! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<i32>, !fir.ref<i32>
! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref<i32>, i1
-! CHECK: fir.cuda_data_transfer %[[AHOST]]#0 to %[[ADEV]]#0 {transfer_kind = #fir.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
+! CHECK: cuf.data_transfer %[[AHOST]]#0 to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32> {
! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[ELEMENTAL]](%{{.*}}) {uniq_name = ".cuf_host_tmp"} : (!hlfir.expr<10xi32>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>, i1)
-! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #fir.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
+! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref<!fir.array<10xi32>>, i1
! CHECK: %[[DES_AHOST:.*]] = hlfir.designate %[[AHOST]]#0 (%c1{{.*}}:%c5{{.*}}:%c1{{.*}}) shape %{{.*}} : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<5xi32>>
! CHECK: %[[DES_ADEV:.*]] = hlfir.designate %[[ADEV]]#0 (%c1{{.*}}:%c5{{.*}}:%c1{{.*}}) shape %{{.*}} : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<5xi32>>
-! CHECK: fir.cuda_data_transfer %[[DES_AHOST]] to %[[DES_ADEV]] {transfer_kind = #fir.cuda_transfer<host_device>} : !fir.ref<!fir.array<5xi32>>, !fir.ref<!fir.array<5xi32>>
+! CHECK: cuf.data_transfer %[[DES_AHOST]] to %[[DES_ADEV]] {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<5xi32>>, !fir.ref<!fir.array<5xi32>>
! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32>
! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[ELEMENTAL]](%{{.*}}) {uniq_name = ".cuf_host_tmp"} : (!hlfir.expr<10xi32>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>, i1)
-! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #fir.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
+! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref<!fir.array<10xi32>>, i1
subroutine sub2()
@@ -81,25 +81,25 @@ subroutine sub2()
end
! CHECK-LABEL: func.func @_QPsub2()
-! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub2Eadev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub2Eadev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub2Eahost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
-! CHECK: %[[BDEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub2Ebdev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+! CHECK: %[[BDEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub2Ebdev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[BHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub2Ebhost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub2Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub2Em"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: fir.cuda_data_transfer %[[ADEV]]#0 to %[[AHOST]]#0 {transfer_kind = #fir.cuda_transfer<device_host>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
-! CHECK: fir.cuda_data_transfer %[[M]]#0 to %[[I]]#0 {transfer_kind = #fir.cuda_transfer<device_host>} : !fir.ref<i32>, !fir.ref<i32>
+! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub2Em"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: cuf.data_transfer %[[ADEV]]#0 to %[[AHOST]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
+! CHECK: cuf.data_transfer %[[M]]#0 to %[[I]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<i32>, !fir.ref<i32>
! CHECK: %[[DES_ADEV:.*]] = hlfir.designate %[[ADEV]]#0 (%{{.*}}:%{{.*}}:%{{.*}}) shape %{{.*}} : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<5xi32>>
! CHECK: %[[DES_AHOST:.*]] = hlfir.designate %[[AHOST]]#0 (%{{.*}}:%{{.*}}:%{{.*}}) shape %{{.*}} : (!fir.ref<!fir.array<10xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<5xi32>>
-! CHECK: fir.cuda_data_transfer %[[DES_ADEV]] to %[[DES_AHOST]] {transfer_kind = #fir.cuda_transfer<device_host>} : !fir.ref<!fir.array<5xi32>>, !fir.ref<!fir.array<5xi32>>
+! CHECK: cuf.data_transfer %[[DES_ADEV]] to %[[DES_AHOST]] {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<5xi32>>, !fir.ref<!fir.array<5xi32>>
-! CHECK: fir.cuda_data_transfer %[[ADEV]]#0 to %[[BDEV]]#0 {transfer_kind = #fir.cuda_transfer<device_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
+! CHECK: cuf.data_transfer %[[ADEV]]#0 to %[[BDEV]]#0 {transfer_kind = #cuf.cuda_transfer<device_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<10xi32> {bindc_name = ".tmp", uniq_name = ""}
! CHECK: %[[DECL_TEMP:.*]]:2 = hlfir.declare %[[TEMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>)
-! CHECK: %[[ADEV_TEMP:.*]]:2 = hlfir.declare %[[DECL_TEMP]]#1(%{{.*}}) {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub2Eadev"} : (!fir.heap<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>)
-! CHECK: fir.cuda_data_transfer %[[ADEV]]#1 to %[[DECL_TEMP]]#0 {transfer_kind = #fir.cuda_transfer<device_host>} : !fir.ref<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>
+! CHECK: %[[ADEV_TEMP:.*]]:2 = hlfir.declare %[[DECL_TEMP]]#1(%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub2Eadev"} : (!fir.heap<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>)
+! CHECK: cuf.data_transfer %[[ADEV]]#1 to %[[DECL_TEMP]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<10xi32>>, !fir.heap<!fir.array<10xi32>>
! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32>
! CHECK: hlfir.assign %[[ELEMENTAL]] to %[[BHOST]]#0 : !hlfir.expr<10xi32>, !fir.ref<!fir.array<10xi32>>
! CHECK: fir.freemem %[[DECL_TEMP]]#0 : !fir.heap<!fir.array<10xi32>>
@@ -116,12 +116,12 @@ end
! CHECK: %[[TMP:.*]] = fir.alloca !fir.type<_QMmod1Tt1{i:i32}> {bindc_name = ".tmp"}
! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub3Eahost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[BHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub3Ebhost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
-! CHECK: %[[T:.*]]:2 = hlfir.declare %7 {cuda_attr = #fir.cuda<device>, uniq_name = "_QFsub3Et"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
+! CHECK: %[[T:.*]]:2 = hlfir.declare %7 {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub3Et"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
! CHECK: %[[TMP_DECL:.*]]:2 = hlfir.declare %0 {uniq_name = ".tmp"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
-! CHECK: fir.cuda_data_transfer %[[T]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #fir.cuda_transfer<device_host>} : !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>
+! CHECK: cuf.data_transfer %[[T]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>
-! Check that fir.cuda_data_transfer are not generated within cuf kernel
+! Check that cuf.data_transfer are not generated within cuf kernel
subroutine sub4()
integer, parameter :: n = 10
real, device :: adev(n)
@@ -137,9 +137,9 @@ subroutine sub4()
end subroutine
! CHECK-LABEL: func.func @_QPsub4()
-! CHECK: fir.cuda_data_transfer
-! CHECK: fir.cuda_kernel<<<*, *>>>
-! CHECK-NOT: fir.cuda_data_transfer
+! CHECK: cuf.data_transfer
+! CHECK: cuf.kernel<<<*, *>>>
+! CHECK-NOT: cuf.data_transfer
! CHECK: hlfir.assign
attributes(global) subroutine sub5(a)
@@ -149,7 +149,7 @@ attributes(global) subroutine sub5(a)
end subroutine
! CHECK-LABEL: func.func @_QPsub5
-! CHECK-NOT: fir.cuda_data_transfer
+! CHECK-NOT: cuf.data_transfer
attributes(host,device) subroutine sub6(a)
integer, device :: a
@@ -158,4 +158,4 @@ attributes(host,device) subroutine sub6(a)
end subroutine
! CHECK-LABEL: func.func @_QPsub6
-! CHECK: fir.cuda_data_transfer
+! CHECK: cuf.data_transfer
diff --git a/flang/test/Lower/CUDA/cuda-kernel-calls.cuf b/flang/test/Lower/CUDA/cuda-kernel-calls.cuf
index 7e28fbb2231a2..82d1a61f8e157 100644
--- a/flang/test/Lower/CUDA/cuda-kernel-calls.cuf
+++ b/flang/test/Lower/CUDA/cuda-kernel-calls.cuf
@@ -16,10 +16,10 @@ contains
subroutine host()
real, device :: a
! CHECK-LABEL: func.func @_QMtest_callPhost()
-! CHECK: %[[A:.*]]:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda<device>, uniq_name = "_QMtest_callFhostEa"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[A:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QMtest_callFhostEa"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
call dev_kernel0<<<10, 20>>>()
-! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}>>>()
+! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}>>>()
call dev_kernel0<<< __builtin_dim3(1,1,4), __builtin_dim3(32,1,1) >>>
! CHECK: %[[ADDR_DIM3_GRID:.*]] = fir.address_of(@_QQro._QM__fortran_builtinsT__builtin_dim3.{{.*}}) : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
@@ -38,16 +38,16 @@ contains
! CHECK: %[[BLOCK_Y_LOAD:.*]] = fir.load %[[BLOCK_Y]] : !fir.ref<i32>
! CHECK: %[[BLOCK_Z:.*]] = hlfir.designate %[[DIM3_BLOCK]]#1{"z"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> !fir.ref<i32>
! CHECK: %[[BLOCK_Z_LOAD:.*]] = fir.load %[[BLOCK_Z]] : !fir.ref<i32>
-! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%[[GRID_X_LOAD]], %[[GRID_Y_LOAD]], %[[GRID_Z_LOAD]], %[[BLOCK_X_LOAD]], %[[BLOCK_Y_LOAD]], %[[BLOCK_Z_LOAD]]>>>()
+! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel0<<<%[[GRID_X_LOAD]], %[[GRID_Y_LOAD]], %[[GRID_Z_LOAD]], %[[BLOCK_X_LOAD]], %[[BLOCK_Y_LOAD]], %[[BLOCK_Z_LOAD]]>>>()
call dev_kernel0<<<10, 20, 2>>>()
-! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}>>>()
+! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}>>>()
call dev_kernel0<<<10, 20, 2, 0>>>()
-! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}, %c0{{.*}}>>>()
+! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel0<<<%c10{{.*}}, %c1{{.*}}, %c1{{.*}}, %c20{{.*}}, %c1{{.*}}, %c1{{.*}}, %c2{{.*}}, %c0{{.*}}>>>()
call dev_kernel1<<<1, 32>>>(a)
-! CHECK: fir.cuda_kernel_launch @_QMtest_callPdev_kernel1<<<%c1{{.*}}, %c1{{.*}}, %c1{{.*}}, %c32{{.*}}, %c1{{.*}}, %c1{{.*}}>>>(%1#1) : (!fir.ref<f32>)
+! CHECK: cuf.kernel_launch @_QMtest_callPdev_kernel1<<<%c1{{.*}}, %c1{{.*}}, %c1{{.*}}, %c32{{.*}}, %c1{{.*}}, %c1{{.*}}>>>(%1#1) : (!fir.ref<f32>)
end
end
diff --git a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
index e1cc35772618a..89de367b723f5 100644
--- a/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
+++ b/flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf
@@ -20,7 +20,7 @@ subroutine sub1()
! CHECK: %[[LB:.*]] = fir.convert %c1{{.*}} : (i32) -> index
! CHECK: %[[UB:.*]] = fir.convert %c100{{.*}} : (i32) -> index
! CHECK: %[[STEP:.*]] = arith.constant 1 : index
-! CHECK: fir.cuda_kernel<<<%c1_i32, %c2_i32>>> (%[[ARG0:.*]] : index) = (%[[LB]] : index) to (%[[UB]] : index) step (%[[STEP]] : index)
+! CHECK: cuf.kernel<<<%c1_i32, %c2_i32>>> (%[[ARG0:.*]] : index) = (%[[LB]] : index) to (%[[UB]] : index) step (%[[STEP]] : index)
! CHECK-NOT: fir.do_loop
! CHECK: %[[ARG0_I32:.*]] = fir.convert %[[ARG0]] : (index) -> i32
! CHECK: fir.store %[[ARG0_I32]] to %[[IV]]#1 : !fir.ref<i32>
@@ -32,7 +32,7 @@ subroutine sub1()
a(i) = a(i) * b(i)
end do
-! CHECK: fir.cuda_kernel<<<*, *>>> (%{{.*}} : index) = (%{{.*}} : index) to (%{{.*}} : index) step (%{{.*}} : index)
+! CHECK: cuf.kernel<<<*, *>>> (%{{.*}} : index) = (%{{.*}} : index) to (%{{.*}} : index) step (%{{.*}} : index)
!$cuf kernel do(2) <<< 1, (256,1) >>>
do i = 1, n
@@ -41,7 +41,7 @@ subroutine sub1()
end do
end do
-! CHECK: fir.cuda_kernel<<<%c1{{.*}}, (%c256{{.*}}, %c1{{.*}})>>> (%[[ARG0:.*]] : index, %[[ARG1:.*]] : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
+! CHECK: cuf.kernel<<<%c1{{.*}}, (%c256{{.*}}, %c1{{.*}})>>> (%[[ARG0:.*]] : index, %[[ARG1:.*]] : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
! CHECK: %[[ARG0_I32:.*]] = fir.convert %[[ARG0]] : (index) -> i32
! CHECK: fir.store %[[ARG0_I32]] to %[[IV]]#1 : !fir.ref<i32>
! CHECK: %[[ARG1_I32:.*]] = fir.convert %[[ARG1]] : (index) -> i32
@@ -54,7 +54,7 @@ subroutine sub1()
c(i,j) = c(i,j) * d(i,j)
end do
end do
-! CHECK: fir.cuda_kernel<<<(%c1{{.*}}, %c0{{.*}}), (%c256{{.*}}, %c1{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
+! CHECK: cuf.kernel<<<(%c1{{.*}}, %c0{{.*}}), (%c256{{.*}}, %c1{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
!$cuf kernel do(2) <<< (*,*), (32,4) >>>
do i = 1, n
@@ -63,5 +63,5 @@ subroutine sub1()
end do
end do
-! CHECK: fir.cuda_kernel<<<*, (%c32{{.*}}, %c4{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
+! CHECK: cuf.kernel<<<*, (%c32{{.*}}, %c4{{.*}})>>> (%{{.*}} : index, %{{.*}} : index) = (%{{.*}}, %{{.*}} : index, index) to (%{{.*}}, %{{.*}} : index, index) step (%{{.*}}, %{{.*}} : index, index)
end
diff --git a/flang/test/Lower/CUDA/cuda-mod.cuf b/flang/test/Lower/CUDA/cuda-mod.cuf
index ae5bf63d2da49..f03e72e94780f 100644
--- a/flang/test/Lower/CUDA/cuda-mod.cuf
+++ b/flang/test/Lower/CUDA/cuda-mod.cuf
@@ -10,6 +10,6 @@ contains
end
end module
-! CHECK: fir.global @_QMcuf_modEmd {cuda_attr = #fir.cuda<device>} : f32
+! CHECK: fir.global @_QMcuf_modEmd {data_attr = #cuf.cuda<device>} : f32
-! CHECK: func.func @_QMcuf_modPdevsub() attributes {fir.cuda_attr = #fir.cuda_proc<device>}
+! CHECK: func.func @_QMcuf_modPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<device>}
diff --git a/flang/test/Lower/CUDA/cuda-module-use.cuf b/flang/test/Lower/CUDA/cuda-module-use.cuf
index 47d3805065a5a..130fefab24d90 100644
--- a/flang/test/Lower/CUDA/cuda-module-use.cuf
+++ b/flang/test/Lower/CUDA/cuda-module-use.cuf
@@ -10,16 +10,16 @@ end
! CHECK-LABEL: func.func @_QPsub1()
! CHECK: %[[ADDR:.*]] = fir.address_of(@_QMcuf_modEmd) : !fir.ref<f32>
-! CHECK: %{{.*}}:2 = hlfir.declare %[[ADDR]] {cuda_attr = #fir.cuda<device>, uniq_name = "_QMcuf_modEmd"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %{{.*}}:2 = hlfir.declare %[[ADDR]] {data_attr = #cuf.cuda<device>, uniq_name = "_QMcuf_modEmd"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
attributes(device) subroutine sub2()
use cuf_mod
call devsub()
end
-! CHECK-LABEL: func.func @_QPsub2() attributes {fir.cuda_attr = #fir.cuda_proc<device>}
+! CHECK-LABEL: func.func @_QPsub2() attributes {cuf.proc_attr = #cuf.cuda_proc<device>}
! CHECK: fir.call @_QMcuf_modPdevsub()
-! CHECK-LABEL: fir.global @_QMcuf_modEmd {cuda_attr = #fir.cuda<device>} : f32
+! CHECK-LABEL: fir.global @_QMcuf_modEmd {data_attr = #cuf.cuda<device>} : f32
-! CHECK-LABEL: func.func private @_QMcuf_modPdevsub() attributes {fir.cuda_attr = #fir.cuda_proc<device>}
+! CHECK-LABEL: func.func private @_QMcuf_modPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<device>}
diff --git a/flang/test/Lower/CUDA/cuda-proc-attribute.cuf b/flang/test/Lower/CUDA/cuda-proc-attribute.cuf
index d9765f6cd2fe8..f8b8dd8e296b4 100644
--- a/flang/test/Lower/CUDA/cuda-proc-attribute.cuf
+++ b/flang/test/Lower/CUDA/cuda-proc-attribute.cuf
@@ -4,40 +4,40 @@
! Test lowering of CUDA attribute on procedures.
attributes(host) subroutine sub_host(); end
-! CHECK: func.func @_QPsub_host() attributes {fir.cuda_attr = #fir.cuda_proc<host>}
+! CHECK: func.func @_QPsub_host() attributes {cuf.proc_attr = #cuf.cuda_proc<host>}
attributes(device) subroutine sub_device(); end
-! CHECK: func.func @_QPsub_device() attributes {fir.cuda_attr = #fir.cuda_proc<device>}
+! CHECK: func.func @_QPsub_device() attributes {cuf.proc_attr = #cuf.cuda_proc<device>}
attributes(host) attributes(device) subroutine sub_host_device; end
-! CHECK: func.func @_QPsub_host_device() attributes {fir.cuda_attr = #fir.cuda_proc<host_device>}
+! CHECK: func.func @_QPsub_host_device() attributes {cuf.proc_attr = #cuf.cuda_proc<host_device>}
attributes(device) attributes(host) subroutine sub_device_host; end
-! CHECK: func.func @_QPsub_device_host() attributes {fir.cuda_attr = #fir.cuda_proc<host_device>}
+! CHECK: func.func @_QPsub_device_host() attributes {cuf.proc_attr = #cuf.cuda_proc<host_device>}
attributes(global) subroutine sub_global(); end
-! CHECK: func.func @_QPsub_global() attributes {fir.cuda_attr = #fir.cuda_proc<global>}
+! CHECK: func.func @_QPsub_global() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
attributes(grid_global) subroutine sub_grid_global(); end
-! CHECK: func.func @_QPsub_grid_global() attributes {fir.cuda_attr = #fir.cuda_proc<grid_global>}
+! CHECK: func.func @_QPsub_grid_global() attributes {cuf.proc_attr = #cuf.cuda_proc<grid_global>}
attributes(host) integer function fct_host(); end
-! CHECK: func.func @_QPfct_host() -> i32 attributes {fir.cuda_attr = #fir.cuda_proc<host>}
+! CHECK: func.func @_QPfct_host() -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc<host>}
attributes(device) integer function fct_device(); end
-! CHECK: func.func @_QPfct_device() -> i32 attributes {fir.cuda_attr = #fir.cuda_proc<device>}
+! CHECK: func.func @_QPfct_device() -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc<device>}
attributes(host) attributes(device) integer function fct_host_device; end
-! CHECK: func.func @_QPfct_host_device() -> i32 attributes {fir.cuda_attr = #fir.cuda_proc<host_device>}
+! CHECK: func.func @_QPfct_host_device() -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc<host_device>}
attributes(device) attributes(host) integer function fct_device_host; end
-! CHECK: func.func @_QPfct_device_host() -> i32 attributes {fir.cuda_attr = #fir.cuda_proc<host_device>}
+! CHECK: func.func @_QPfct_device_host() -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc<host_device>}
attributes(global) launch_bounds(1, 2) subroutine sub_lbounds1(); end
-! CHECK: func.func @_QPsub_lbounds1() attributes {fir.cuda_attr = #fir.cuda_proc<global>, fir.cuda_launch_bounds = #fir.launch_bounds<maxTPB = 1 : i64, minBPM = 2 : i64>}
+! CHECK: func.func @_QPsub_lbounds1() attributes {cuf.launch_bounds = #cuf.launch_bounds<maxTPB = 1 : i64, minBPM = 2 : i64>, cuf.proc_attr = #cuf.cuda_proc<global>}
attributes(global) launch_bounds(1, 2, 3) subroutine sub_lbounds2(); end
-! CHECK: func.func @_QPsub_lbounds2() attributes {fir.cuda_attr = #fir.cuda_proc<global>, fir.cuda_launch_bounds = #fir.launch_bounds<maxTPB = 1 : i64, minBPM = 2 : i64, upperBoundClusterSize = 3 : i64>}
+! CHECK: func.func @_QPsub_lbounds2() attributes {cuf.launch_bounds = #cuf.launch_bounds<maxTPB = 1 : i64, minBPM = 2 : i64, upperBoundClusterSize = 3 : i64>, cuf.proc_attr = #cuf.cuda_proc<global>}
attributes(global) cluster_dims(1, 2, 3) subroutine sub_clusterdims1(); end
-! CHECK: func.func @_QPsub_clusterdims1() attributes {fir.cuda_attr = #fir.cuda_proc<global>, fir.cuda_cluster_dims = #fir.cluster_dims<x = 1 : i64, y = 2 : i64, z = 3 : i64>}
+! CHECK: func.func @_QPsub_clusterdims1() attributes {cuf.cluster_dims = #cuf.cluster_dims<x = 1 : i64, y = 2 : i64, z = 3 : i64>, cuf.proc_attr = #cuf.cuda_proc<global>}
diff --git a/flang/tools/bbc/CMakeLists.txt b/flang/tools/bbc/CMakeLists.txt
index f21fa3b7bae35..9410fd0056600 100644
--- a/flang/tools/bbc/CMakeLists.txt
+++ b/flang/tools/bbc/CMakeLists.txt
@@ -16,6 +16,8 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
get_property(extension_libs GLOBAL PROPERTY MLIR_EXTENSION_LIBS)
target_link_libraries(bbc PRIVATE
+CUFAttrs
+CUFDialect
FIRDialect
FIRDialectSupport
FIRSupport
diff --git a/flang/tools/fir-opt/CMakeLists.txt b/flang/tools/fir-opt/CMakeLists.txt
index 43b0c74696f57..43679a9d53578 100644
--- a/flang/tools/fir-opt/CMakeLists.txt
+++ b/flang/tools/fir-opt/CMakeLists.txt
@@ -11,6 +11,8 @@ if(FLANG_INCLUDE_TESTS)
endif()
target_link_libraries(fir-opt PRIVATE
+ CUFAttrs
+ CUFDialect
FIRDialect
FIRSupport
FIRTransforms
diff --git a/flang/tools/tco/CMakeLists.txt b/flang/tools/tco/CMakeLists.txt
index 6d83353b4e0d4..808219ac361f2 100644
--- a/flang/tools/tco/CMakeLists.txt
+++ b/flang/tools/tco/CMakeLists.txt
@@ -7,6 +7,8 @@ llvm_update_compile_flags(tco)
get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
get_property(extension_libs GLOBAL PROPERTY MLIR_EXTENSION_LIBS)
target_link_libraries(tco PRIVATE
+ CUFAttrs
+ CUFDialect
FIRCodeGen
FIRDialect
FIRDialectSupport
diff --git a/flang/unittests/Optimizer/CMakeLists.txt b/flang/unittests/Optimizer/CMakeLists.txt
index 9c165d998e2e1..7299e3ee0529a 100644
--- a/flang/unittests/Optimizer/CMakeLists.txt
+++ b/flang/unittests/Optimizer/CMakeLists.txt
@@ -2,6 +2,7 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
get_property(extension_libs GLOBAL PROPERTY MLIR_EXTENSION_LIBS)
set(LIBS
+ CUFDialect
FIRBuilder
FIRCodeGen
FIRDialect
@@ -36,6 +37,7 @@ add_flang_unittest(FlangOptimizerTests
KindMappingTest.cpp
RTBuilder.cpp
DEPENDS
+ CUFDialect
FIRDialect
FIRSupport
HLFIRDialect
diff --git a/flang/unittests/Optimizer/FortranVariableTest.cpp b/flang/unittests/Optimizer/FortranVariableTest.cpp
index f5f559ef887c8..87efb624735cf 100644
--- a/flang/unittests/Optimizer/FortranVariableTest.cpp
+++ b/flang/unittests/Optimizer/FortranVariableTest.cpp
@@ -51,7 +51,7 @@ TEST_F(FortranVariableTest, SimpleScalar) {
/*shape=*/mlir::Value{}, /*typeParams=*/std::nullopt,
/*dummy_scope=*/nullptr, name,
/*fortran_attrs=*/fir::FortranVariableFlagsAttr{},
- /*cuda_attr=*/fir::CUDADataAttributeAttr{});
+ /*data_attr=*/cuf::DataAttributeAttr{});
fir::FortranVariableOpInterface fortranVariable = declare;
EXPECT_FALSE(fortranVariable.isArray());
@@ -77,7 +77,7 @@ TEST_F(FortranVariableTest, CharacterScalar) {
auto declare = builder->create<fir::DeclareOp>(loc, addr.getType(), addr,
/*shape=*/mlir::Value{}, typeParams, /*dummy_scope=*/nullptr, name,
/*fortran_attrs=*/fir::FortranVariableFlagsAttr{},
- /*cuda_attr=*/fir::CUDADataAttributeAttr{});
+ /*data_attr=*/cuf::DataAttributeAttr{});
fir::FortranVariableOpInterface fortranVariable = declare;
EXPECT_FALSE(fortranVariable.isArray());
@@ -108,7 +108,7 @@ TEST_F(FortranVariableTest, SimpleArray) {
auto declare = builder->create<fir::DeclareOp>(loc, addr.getType(), addr,
shape, /*typeParams*/ std::nullopt, /*dummy_scope=*/nullptr, name,
/*fortran_attrs=*/fir::FortranVariableFlagsAttr{},
- /*cuda_attr=*/fir::CUDADataAttributeAttr{});
+ /*data_attr=*/cuf::DataAttributeAttr{});
fir::FortranVariableOpInterface fortranVariable = declare;
EXPECT_TRUE(fortranVariable.isArray());
@@ -139,7 +139,7 @@ TEST_F(FortranVariableTest, CharacterArray) {
auto declare = builder->create<fir::DeclareOp>(loc, addr.getType(), addr,
shape, typeParams, /*dummy_scope=*/nullptr, name,
/*fortran_attrs=*/fir::FortranVariableFlagsAttr{},
- /*cuda_attr=*/fir::CUDADataAttributeAttr{});
+ /*data_attr=*/cuf::DataAttributeAttr{});
fir::FortranVariableOpInterface fortranVariable = declare;
EXPECT_TRUE(fortranVariable.isArray());
>From 922c110208679e524a7b411489a9e151378726b4 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Wed, 15 May 2024 14:12:44 -0700
Subject: [PATCH 2/2] Add missing files
---
.../Dialect/CUF/Attributes/CMakeLists.txt | 7 ++
.../Dialect/CUF/Attributes/CUFAttr.h | 106 ++++++++++++++++++
.../Dialect/CUF/Attributes/CUFAttr.td | 100 +++++++++++++++++
.../Dialect/CUF/Attributes/CMakeLists.txt | 16 +++
.../Dialect/CUF/Attributes/CUFAttr.cpp | 32 ++++++
5 files changed, 261 insertions(+)
create mode 100644 flang/include/flang/Optimizer/Dialect/CUF/Attributes/CMakeLists.txt
create mode 100644 flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h
create mode 100644 flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td
create mode 100644 flang/lib/Optimizer/Dialect/CUF/Attributes/CMakeLists.txt
create mode 100644 flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CMakeLists.txt b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CMakeLists.txt
new file mode 100644
index 0000000000000..bae7fe3484f4e
--- /dev/null
+++ b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CMakeLists.txt
@@ -0,0 +1,7 @@
+set(LLVM_TARGET_DEFINITIONS CUFAttr.td)
+mlir_tablegen(CUFEnumAttr.h.inc -gen-enum-decls)
+mlir_tablegen(CUFEnumAttr.cpp.inc -gen-enum-defs)
+mlir_tablegen(CUFAttr.h.inc --gen-attrdef-decls)
+mlir_tablegen(CUFAttr.cpp.inc -gen-attrdef-defs)
+
+add_public_tablegen_target(CUFAttrsIncGen)
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h
new file mode 100644
index 0000000000000..f32e39b543e3f
--- /dev/null
+++ b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h
@@ -0,0 +1,106 @@
+//===-- Optimizer/Dialect/CUF/Attributes/CUFAttr.h -- CUF attributes ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_DIALECT_CUF_CUFATTR_H
+#define FORTRAN_OPTIMIZER_DIALECT_CUF_CUFATTR_H
+
+#include "flang/Common/Fortran.h"
+#include "mlir/IR/BuiltinAttributes.h"
+
+namespace llvm {
+class StringRef;
+}
+
+#include "flang/Optimizer/Dialect/CUF/Attributes/CUFEnumAttr.h.inc"
+
+#define GET_ATTRDEF_CLASSES
+#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h.inc"
+
+namespace cuf {
+
+/// Attribute to mark Fortran entities with the CUDA attribute.
+static constexpr llvm::StringRef getDataAttrName() { return "cuf.data_attr"; }
+static constexpr llvm::StringRef getProcAttrName() { return "cuf.proc_attr"; }
+
+/// Attribute to carry CUDA launch_bounds values.
+static constexpr llvm::StringRef getLaunchBoundsAttrName() {
+ return "cuf.launch_bounds";
+}
+
+/// Attribute to carry CUDA cluster_dims values.
+static constexpr llvm::StringRef getClusterDimsAttrName() {
+ return "cuf.cluster_dims";
+}
+
+inline cuf::DataAttributeAttr
+getDataAttribute(mlir::MLIRContext *mlirContext,
+ std::optional<Fortran::common::CUDADataAttr> cudaAttr) {
+ if (cudaAttr) {
+ cuf::DataAttribute attr;
+ switch (*cudaAttr) {
+ case Fortran::common::CUDADataAttr::Constant:
+ attr = cuf::DataAttribute::Constant;
+ break;
+ case Fortran::common::CUDADataAttr::Device:
+ attr = cuf::DataAttribute::Device;
+ break;
+ case Fortran::common::CUDADataAttr::Managed:
+ attr = cuf::DataAttribute::Managed;
+ break;
+ case Fortran::common::CUDADataAttr::Pinned:
+ attr = cuf::DataAttribute::Pinned;
+ break;
+ case Fortran::common::CUDADataAttr::Shared:
+ attr = cuf::DataAttribute::Shared;
+ break;
+ case Fortran::common::CUDADataAttr::Texture:
+ // Obsolete attribute
+ return {};
+ case Fortran::common::CUDADataAttr::Unified:
+ attr = cuf::DataAttribute::Unified;
+ break;
+ }
+ return cuf::DataAttributeAttr::get(mlirContext, attr);
+ }
+ return {};
+}
+
+inline cuf::ProcAttributeAttr
+getProcAttribute(mlir::MLIRContext *mlirContext,
+ std::optional<Fortran::common::CUDASubprogramAttrs> cudaAttr) {
+ if (cudaAttr) {
+ cuf::ProcAttribute attr;
+ switch (*cudaAttr) {
+ case Fortran::common::CUDASubprogramAttrs::Host:
+ attr = cuf::ProcAttribute::Host;
+ break;
+ case Fortran::common::CUDASubprogramAttrs::Device:
+ attr = cuf::ProcAttribute::Device;
+ break;
+ case Fortran::common::CUDASubprogramAttrs::HostDevice:
+ attr = cuf::ProcAttribute::HostDevice;
+ break;
+ case Fortran::common::CUDASubprogramAttrs::Global:
+ attr = cuf::ProcAttribute::Global;
+ break;
+ case Fortran::common::CUDASubprogramAttrs::Grid_Global:
+ attr = cuf::ProcAttribute::GridGlobal;
+ break;
+ }
+ return cuf::ProcAttributeAttr::get(mlirContext, attr);
+ }
+ return {};
+}
+
+} // namespace cuf
+
+#endif // FORTRAN_OPTIMIZER_DIALECT_CUF_CUFATTR_H
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td
new file mode 100644
index 0000000000000..8e2b546725271
--- /dev/null
+++ b/flang/include/flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td
@@ -0,0 +1,100 @@
+//===- CUFAttr.td - CUF Attributes -------------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CUF dialect attributes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_DIALECT_CUF_CUFATTRS
+#define FORTRAN_DIALECT_CUF_CUFATTRS
+
+include "flang/Optimizer/Dialect/CUF/CUFDialect.td"
+include "mlir/IR/EnumAttr.td"
+
+class cuf_Attr<string name> : AttrDef<CUFDialect, name>;
+
+def cuf_DataAttribute : I32EnumAttr<
+ "DataAttribute",
+ "CUDA Fortran variable attributes",
+ [
+ I32EnumAttrCase<"Constant", 0, "constant">,
+ I32EnumAttrCase<"Device", 1, "device">,
+ I32EnumAttrCase<"Managed", 2, "managed">,
+ I32EnumAttrCase<"Pinned", 3, "pinned">,
+ I32EnumAttrCase<"Shared", 4, "shared">,
+ I32EnumAttrCase<"Unified", 5, "unified">,
+ // Texture is omitted since it is obsolete and rejected by semantic.
+ ]> {
+ let genSpecializedAttr = 0;
+ let cppNamespace = "::cuf";
+}
+
+def cuf_DataAttributeAttr :
+ EnumAttr<CUFDialect, cuf_DataAttribute, "cuda"> {
+ let assemblyFormat = [{ ```<` $value `>` }];
+}
+
+def cuf_ProcAttribute : I32EnumAttr<
+ "ProcAttribute", "CUDA Fortran procedure attributes",
+ [
+ I32EnumAttrCase<"Host", 0, "host">,
+ I32EnumAttrCase<"Device", 1, "device">,
+ I32EnumAttrCase<"HostDevice", 2, "host_device">,
+ I32EnumAttrCase<"Global", 3, "global">,
+ I32EnumAttrCase<"GridGlobal", 4, "grid_global">,
+ ]> {
+ let genSpecializedAttr = 0;
+ let cppNamespace = "::cuf";
+}
+
+def cuf_ProcAttributeAttr :
+ EnumAttr<CUFDialect, cuf_ProcAttribute, "cuda_proc"> {
+ let assemblyFormat = [{ ```<` $value `>` }];
+}
+
+def cuf_LaunchBoundsAttr : cuf_Attr<"LaunchBounds"> {
+ let mnemonic = "launch_bounds";
+
+ let parameters = (ins
+ "mlir::IntegerAttr":$maxTPB,
+ "mlir::IntegerAttr":$minBPM,
+ OptionalParameter<"mlir::IntegerAttr">:$upperBoundClusterSize
+ );
+
+ let assemblyFormat = "`<` struct(params) `>`";
+}
+
+def cuf_ClusterDimsAttr : cuf_Attr<"ClusterDims"> {
+ let mnemonic = "cluster_dims";
+
+ let parameters = (ins
+ "mlir::IntegerAttr":$x,
+ "mlir::IntegerAttr":$y,
+ "mlir::IntegerAttr":$z
+ );
+
+ let assemblyFormat = "`<` struct(params) `>`";
+}
+
+def cuf_DataTransferKind : I32EnumAttr<
+ "DataTransferKind", "CUDA Fortran data transfer kind",
+ [
+ I32EnumAttrCase<"DeviceHost", 0, "device_host">,
+ I32EnumAttrCase<"HostDevice", 1, "host_device">,
+ I32EnumAttrCase<"DeviceDevice", 2, "device_device">,
+ ]> {
+ let genSpecializedAttr = 0;
+ let cppNamespace = "::cuf";
+}
+
+def cuf_DataTransferKindAttr :
+ EnumAttr<CUFDialect, cuf_DataTransferKind, "cuda_transfer"> {
+ let assemblyFormat = [{ ```<` $value `>` }];
+}
+
+#endif // FORTRAN_DIALECT_CUF_CUFATTRS
diff --git a/flang/lib/Optimizer/Dialect/CUF/Attributes/CMakeLists.txt b/flang/lib/Optimizer/Dialect/CUF/Attributes/CMakeLists.txt
new file mode 100644
index 0000000000000..81db40f3ba466
--- /dev/null
+++ b/flang/lib/Optimizer/Dialect/CUF/Attributes/CMakeLists.txt
@@ -0,0 +1,16 @@
+# Keep CUF attributes as a separate library as FIR and HLFIR depend on it.
+add_flang_library(CUFAttrs
+ CUFAttr.cpp
+
+ DEPENDS
+ MLIRIR
+ CUFAttrsIncGen
+
+ LINK_LIBS
+ MLIRTargetLLVMIRExport
+
+ LINK_COMPONENTS
+ AsmParser
+ AsmPrinter
+ Remarks
+)
diff --git a/flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp b/flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp
new file mode 100644
index 0000000000000..52c733dcad6f0
--- /dev/null
+++ b/flang/lib/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp
@@ -0,0 +1,32 @@
+//===-- CUFAttr.cpp -------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
+#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/DialectImplementation.h"
+#include "mlir/IR/OpDefinition.h"
+#include "llvm/ADT/TypeSwitch.h"
+
+#include "flang/Optimizer/Dialect/CUF/Attributes/CUFEnumAttr.cpp.inc"
+#define GET_ATTRDEF_CLASSES
+#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.cpp.inc"
+
+namespace cuf {
+
+void CUFDialect::registerAttributes() {
+ addAttributes<ClusterDimsAttr, DataAttributeAttr, DataTransferKindAttr,
+ LaunchBoundsAttr, ProcAttributeAttr>();
+}
+
+} // namespace cuf
More information about the flang-commits
mailing list