[flang-commits] [flang] [flang][cuda] Move CUDA Fortran operations to a CUF dialect (PR #92317)

via flang-commits flang-commits at lists.llvm.org
Wed May 15 14:07:44 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-fir-hlfir

@llvm/pr-subscribers-flang-driver

Author: Valentin Clement (バレンタイン クレメン) (clementval)

<details>
<summary>Changes</summary>

The number of operations dedicated to CUF grew and where all still in FIR. In order to have a better organization, the CUF operations, attributes and code is moved into their specific dialect and files. CUF dialect is tightly coupled with HLFIR/FIR and their types.

The CUF attributes are bundled into their own library since some HLFIR/FIR operations depend on them and the CUF dialect depends on the FIR types. Without having the attributes into a separate library there would be a dependency cycle.

---

Patch is 159.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/92317.diff


51 Files Affected:

- (modified) flang/include/flang/Lower/ConvertVariable.h (+7-3) 
- (modified) flang/include/flang/Optimizer/Builder/FIRBuilder.h (+2-2) 
- (modified) flang/include/flang/Optimizer/Builder/HLFIRTools.h (+1-1) 
- (modified) flang/include/flang/Optimizer/Dialect/CMakeLists.txt (+2) 
- (added) flang/include/flang/Optimizer/Dialect/CUF/CMakeLists.txt (+11) 
- (added) flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.h (+26) 
- (added) flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.td (+43) 
- (added) flang/include/flang/Optimizer/Dialect/CUF/CUFOps.h (+20) 
- (added) flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td (+263) 
- (modified) flang/include/flang/Optimizer/Dialect/FIRAttr.td (-83) 
- (modified) flang/include/flang/Optimizer/Dialect/FIROps.h (+1) 
- (modified) flang/include/flang/Optimizer/Dialect/FIROps.td (+3-239) 
- (modified) flang/include/flang/Optimizer/Dialect/FIROpsSupport.h (-13) 
- (modified) flang/include/flang/Optimizer/HLFIR/HLFIROps.td (+3-2) 
- (modified) flang/include/flang/Optimizer/Support/InitFIR.h (+2-1) 
- (modified) flang/include/flang/Optimizer/Support/Utils.h (+1-60) 
- (modified) flang/lib/Frontend/CMakeLists.txt (+2) 
- (modified) flang/lib/Lower/Allocatable.cpp (+9-8) 
- (modified) flang/lib/Lower/Bridge.cpp (+26-26) 
- (modified) flang/lib/Lower/CMakeLists.txt (+4) 
- (modified) flang/lib/Lower/CallInterface.cpp (+10-11) 
- (modified) flang/lib/Lower/ConvertCall.cpp (+2-1) 
- (modified) flang/lib/Lower/ConvertVariable.cpp (+29-28) 
- (modified) flang/lib/Optimizer/Builder/FIRBuilder.cpp (+4-4) 
- (modified) flang/lib/Optimizer/Builder/HLFIRTools.cpp (+2-2) 
- (modified) flang/lib/Optimizer/Dialect/CMakeLists.txt (+3) 
- (added) flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt (+22) 
- (added) flang/lib/Optimizer/Dialect/CUF/CUFDialect.cpp (+25) 
- (added) flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp (+219) 
- (modified) flang/lib/Optimizer/Dialect/FIRAttr.cpp (+1-3) 
- (modified) flang/lib/Optimizer/Dialect/FIRDialect.cpp (+1) 
- (modified) flang/lib/Optimizer/Dialect/FIROps.cpp (-163) 
- (modified) flang/lib/Optimizer/HLFIR/IR/CMakeLists.txt (+2) 
- (modified) flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp (+2-2) 
- (modified) flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt (+2) 
- (modified) flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp (+4-4) 
- (modified) flang/test/Fir/cuf-invalid.fir (+25-25) 
- (modified) flang/test/Fir/cuf.mlir (+23-23) 
- (modified) flang/test/Lower/CUDA/cuda-allocatable.cuf (+24-24) 
- (modified) flang/test/Lower/CUDA/cuda-data-attribute.cuf (+30-30) 
- (modified) flang/test/Lower/CUDA/cuda-data-transfer.cuf (+25-25) 
- (modified) flang/test/Lower/CUDA/cuda-kernel-calls.cuf (+6-6) 
- (modified) flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf (+5-5) 
- (modified) flang/test/Lower/CUDA/cuda-mod.cuf (+2-2) 
- (modified) flang/test/Lower/CUDA/cuda-module-use.cuf (+4-4) 
- (modified) flang/test/Lower/CUDA/cuda-proc-attribute.cuf (+13-13) 
- (modified) flang/tools/bbc/CMakeLists.txt (+2) 
- (modified) flang/tools/fir-opt/CMakeLists.txt (+2) 
- (modified) flang/tools/tco/CMakeLists.txt (+2) 
- (modified) flang/unittests/Optimizer/CMakeLists.txt (+2) 
- (modified) flang/unittests/Optimizer/FortranVariableTest.cpp (+4-4) 


``````````diff
diff --git a/flang/include/flang/Lower/ConvertVariable.h b/flang/include/flang/Lower/ConvertVariable.h
index d70d3268acac1..515f4695951b4 100644
--- a/flang/include/flang/Lower/ConvertVariable.h
+++ b/flang/include/flang/Lower/ConvertVariable.h
@@ -23,6 +23,10 @@
 #include "mlir/IR/Value.h"
 #include "llvm/ADT/DenseMap.h"
 
+namespace cuf {
+class DataAttributeAttr;
+}
+
 namespace fir {
 class ExtendedValue;
 class FirOpBuilder;
@@ -146,9 +150,9 @@ translateSymbolAttributes(mlir::MLIRContext *mlirContext,
 
 /// Translate the CUDA Fortran attributes of \p sym into the FIR CUDA attribute
 /// representation.
-fir::CUDADataAttributeAttr
-translateSymbolCUDADataAttribute(mlir::MLIRContext *mlirContext,
-                                 const Fortran::semantics::Symbol &sym);
+cuf::DataAttributeAttr
+translateSymbolCUFDataAttribute(mlir::MLIRContext *mlirContext,
+                                const Fortran::semantics::Symbol &sym);
 
 /// Map a symbol to a given fir::ExtendedValue. This will generate an
 /// hlfir.declare when lowering to HLFIR and map the hlfir.declare result to the
diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
index 0d650f830b64e..287730ef2ac85 100644
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@@ -254,13 +254,13 @@ class FirOpBuilder : public mlir::OpBuilder, public mlir::OpBuilder::Listener {
                              mlir::StringAttr linkage = {},
                              mlir::Attribute value = {}, bool isConst = false,
                              bool isTarget = false,
-                             fir::CUDADataAttributeAttr cudaAttr = {});
+                             cuf::DataAttributeAttr dataAttr = {});
 
   fir::GlobalOp createGlobal(mlir::Location loc, mlir::Type type,
                              llvm::StringRef name, bool isConst, bool isTarget,
                              std::function<void(FirOpBuilder &)> bodyBuilder,
                              mlir::StringAttr linkage = {},
-                             fir::CUDADataAttributeAttr cudaAttr = {});
+                             cuf::DataAttributeAttr dataAttr = {});
 
   /// Create a global constant (read-only) value.
   fir::GlobalOp createGlobalConstant(mlir::Location loc, mlir::Type type,
diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 6cc8e71b3b18d..43aa1661550ec 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -239,7 +239,7 @@ genDeclare(mlir::Location loc, fir::FirOpBuilder &builder,
            const fir::ExtendedValue &exv, llvm::StringRef name,
            fir::FortranVariableFlagsAttr flags,
            mlir::Value dummyScope = nullptr,
-           fir::CUDADataAttributeAttr cudaAttr = {});
+           cuf::DataAttributeAttr dataAttr = {});
 
 /// Generate an hlfir.associate to build a variable from an expression value.
 /// The type of the variable must be provided so that scalar logicals are
diff --git a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
index f00993d4d3778..301a93c1fe5b4 100644
--- a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
+++ b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
@@ -1,3 +1,5 @@
+add_subdirectory(CUF)
+
 # This replicates part of the add_mlir_dialect cmake function from MLIR that
 # cannot be used her because it expects to be run inside MLIR directory which
 # is not the case for FIR.
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CMakeLists.txt b/flang/include/flang/Optimizer/Dialect/CUF/CMakeLists.txt
new file mode 100644
index 0000000000000..07490c7b9ca2c
--- /dev/null
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_subdirectory(Attributes)
+
+set(LLVM_TARGET_DEFINITIONS CUFDialect.td)
+mlir_tablegen(CUFDialect.h.inc -gen-dialect-decls -dialect=cuf)
+mlir_tablegen(CUFDialect.cpp.inc -gen-dialect-defs -dialect=cuf)
+
+set(LLVM_TARGET_DEFINITIONS CUFOps.td)
+mlir_tablegen(CUFOps.h.inc -gen-op-decls)
+mlir_tablegen(CUFOps.cpp.inc -gen-op-defs)
+
+add_public_tablegen_target(CUFOpsIncGen)
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.h b/flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.h
new file mode 100644
index 0000000000000..cf562b2268355
--- /dev/null
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.h
@@ -0,0 +1,26 @@
+//===-- Optimizer/Dialect/CUFDialect.h -- CUF dialect -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_DIALECT_CUF_CUFDIALECT_H
+#define FORTRAN_OPTIMIZER_DIALECT_CUF_CUFDIALECT_H
+
+#include "mlir/Bytecode/BytecodeOpInterface.h"
+#include "mlir/IR/Dialect.h"
+#include "mlir/IR/SymbolTable.h"
+#include "mlir/Interfaces/CallInterfaces.h"
+#include "mlir/Interfaces/FunctionInterfaces.h"
+#include "mlir/Interfaces/LoopLikeInterface.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
+
+#include "flang/Optimizer/Dialect/CUF/CUFDialect.h.inc"
+
+#endif // FORTRAN_OPTIMIZER_DIALECT_CUF_CUFDIALECT_H
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.td
new file mode 100644
index 0000000000000..df866e5664068
--- /dev/null
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFDialect.td
@@ -0,0 +1,43 @@
+//===-- CUFDialect.td - CUF dialect base definitions -------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Definition of the CUDA Fortran dialect
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_DIALECT_CUF_CUFDIALECT
+#define FORTRAN_DIALECT_CUF_CUFDIALECT
+
+include "mlir/IR/AttrTypeBase.td"
+include "mlir/IR/EnumAttr.td"
+include "mlir/IR/OpBase.td"
+
+def CUFDialect : Dialect {
+  let name = "cuf";
+
+  let summary = "CUDA Fortran dialect";
+
+  let description = [{
+    This dialect models CUDA Fortran operations. The CUF dialect operations use
+    the FIR types and are tightly coupled with FIR and HLFIR.
+  }];
+
+  let useDefaultAttributePrinterParser = 1;
+  let usePropertiesForAttributes = 1;
+  let cppNamespace = "::cuf";
+  let dependentDialects = ["fir::FIROpsDialect"];
+
+  let extraClassDeclaration = [{
+  private:
+    // Register the CUF Attributes.
+    void registerAttributes();
+  }];
+}
+
+#endif // FORTRAN_DIALECT_CUF_CUFDIALECT
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.h b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.h
new file mode 100644
index 0000000000000..4132db672e394
--- /dev/null
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.h
@@ -0,0 +1,20 @@
+//===-- Optimizer/Dialect/CUF/CUFOps.h - CUF operations ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_DIALECT_CUF_CUFOPS_H
+#define FORTRAN_OPTIMIZER_DIALECT_CUF_CUFOPS_H
+
+#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
+#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "mlir/IR/OpDefinition.h"
+
+#define GET_OP_CLASSES
+#include "flang/Optimizer/Dialect/CUF/CUFOps.h.inc"
+
+#endif // FORTRAN_OPTIMIZER_DIALECT_CUF_CUFOPS_H
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
new file mode 100644
index 0000000000000..6ec2693077282
--- /dev/null
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -0,0 +1,263 @@
+//===-- CUFOps.td - CUF operation definitions --------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Definition of the CUF dialect operations
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_DIALECT_CUF_CUF_OPS
+#define FORTRAN_DIALECT_CUF_CUF_OPS
+
+include "flang/Optimizer/Dialect/CUF/CUFDialect.td"
+include "flang/Optimizer/Dialect/CUF/CUFAttr.td"
+include "flang/Optimizer/Dialect/FIRTypes.td"
+include "mlir/Interfaces/LoopLikeInterface.td"
+include "mlir/IR/BuiltinAttributes.td"
+
+class cuf_Op<string mnemonic, list<Trait> traits>
+    : Op<CUFDialect, mnemonic, traits>;
+
+def cuf_AllocOp : cuf_Op<"alloc", [AttrSizedOperandSegments,
+    MemoryEffects<[MemAlloc]>]> {
+  let summary = "Allocate an object on device";
+
+  let description = [{
+    This is a drop in replacement for fir.alloca and fir.allocmem for device
+    object. Any device, managed or unified object declared in an host
+    subprogram needs to be allocated in the device memory through runtime calls.
+    The cuf.alloc is an abstraction to the runtime calls and works together
+    with cuf.free.
+  }];
+
+  let arguments = (ins
+    TypeAttr:$in_type,
+    OptionalAttr<StrAttr>:$uniq_name,
+    OptionalAttr<StrAttr>:$bindc_name,
+    Variadic<AnyIntegerType>:$typeparams,
+    Variadic<AnyIntegerType>:$shape,
+    cuf_DataAttributeAttr:$data_attr
+  );
+
+  let results = (outs fir_ReferenceType:$ptr);
+
+  let assemblyFormat = [{
+    $in_type (`(` $typeparams^ `:` type($typeparams) `)`)?
+        (`,` $shape^ `:` type($shape) )?  attr-dict `->` qualified(type($ptr))
+  }];
+
+  let builders = [
+    OpBuilder<(ins "mlir::Type":$inType, "llvm::StringRef":$uniqName,
+      "llvm::StringRef":$bindcName,
+      "cuf::DataAttributeAttr":$cudaAttr,
+      CArg<"mlir::ValueRange", "{}">:$typeparams,
+      CArg<"mlir::ValueRange", "{}">:$shape,
+      CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes)>];
+
+  let hasVerifier = 1;
+}
+
+def cuf_FreeOp : cuf_Op<"free", [MemoryEffects<[MemFree]>]> {
+  let summary = "Free a device allocated object";
+
+  let description = [{
+    The cuf.free operation frees the memory allocated by cuf.alloc.
+    This is used for non-allocatable device, managed and unified device
+    variables declare in host subprogram.
+  }];
+
+  let arguments = (ins
+    Arg<AnyReferenceLike, "", [MemFree]>:$devptr,
+    cuf_DataAttributeAttr:$data_attr
+  );
+
+  let assemblyFormat = "$devptr `:` qualified(type($devptr)) attr-dict";
+
+  let hasVerifier = 1;
+}
+
+def cuf_AllocateOp : cuf_Op<"allocate", [AttrSizedOperandSegments,
+    MemoryEffects<[MemAlloc<DefaultResource>]>]> {
+  let summary = "Perform the device allocation of data of an allocatable";
+
+  let description = [{
+    The cuf.allocate operation performs the allocation on the device
+    of the data of an allocatable. The descriptor passed to the operation
+    is initialized before with the standard flang runtime calls.
+  }];
+
+  let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
+                       Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
+                       Optional<AnyIntegerType>:$stream,
+                       Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$pinned,
+                       Arg<Optional<AnyRefOrBoxType>, "", [MemRead]>:$source,
+                       cuf_DataAttributeAttr:$data_attr,
+                       UnitAttr:$hasStat);
+
+  let results = (outs AnyIntegerType:$stat);
+
+  let assemblyFormat = [{
+    $box `:` qualified(type($box))
+    ( `source` `(` $source^ `:` qualified(type($source) )`)` )?
+    ( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )?
+    ( `stream` `(` $stream^ `:` type($stream) `)` )?
+    ( `pinned` `(` $pinned^ `:` type($pinned) `)` )?
+    attr-dict `->` type($stat)
+  }];
+
+  let hasVerifier = 1;
+}
+
+def cuf_DeallocateOp : cuf_Op<"deallocate",
+    [MemoryEffects<[MemFree<DefaultResource>]>]> {
+  let summary = "Perform the device deallocation of data of an allocatable";
+
+  let description = [{
+    The cuf.deallocate operation performs the deallocation on the device
+    of the data of an allocatable.
+  }];
+
+  let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
+                       Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
+                       cuf_DataAttributeAttr:$data_attr,
+                       UnitAttr:$hasStat);
+
+  let results = (outs AnyIntegerType:$stat);
+
+  let assemblyFormat = [{
+    $box `:` qualified(type($box))
+    ( `errmsg` `(` $errmsg^ `:` type($errmsg) `)` )?
+    attr-dict `->` type($stat)
+  }];
+
+  let hasVerifier = 1;
+}
+
+def cuf_DataTransferOp : cuf_Op<"data_transfer", []> {
+  let summary = "Represent a data transfer between host and device memory";
+
+  let description = [{
+    CUDA Fortran allows data transfer to be done via intrinsic assignment
+    between a host and a device variable. This operation is used to materialized
+    the data transfer between the lhs and rhs memory references.
+    The kind of transfer is specified in the attribute. 
+
+    ```
+      adev = a ! transfer host to device
+      a = adev ! transfer device to host
+      bdev = adev ! transfer device to device
+    ```
+  }];
+
+  let arguments = (ins Arg<AnyReferenceLike, "", [MemWrite]>:$src,
+                       Arg<AnyReferenceLike, "", [MemRead]>:$dst,
+                       cuf_DataTransferKindAttr:$transfer_kind);
+
+  let assemblyFormat = [{
+    $src `to` $dst attr-dict `:` type(operands)
+  }];
+}
+
+def cuf_KernelLaunchOp : cuf_Op<"kernel_launch", [CallOpInterface,
+    AttrSizedOperandSegments]> {
+  let summary = "call CUDA kernel";
+
+  let description = [{
+    Launch a CUDA kernel from the host.
+
+    ```
+      // launch simple kernel with no arguments. bytes and stream value are
+      // optional in the chevron notation.
+      cuf.kernel_launch @kernel<<<%gx, %gy, %bx, %by, %bz>>>()
+    ```
+  }];
+
+  let arguments = (ins
+    SymbolRefAttr:$callee,
+    I32:$grid_x,
+    I32:$grid_y,
+    I32:$grid_z,
+    I32:$block_x,
+    I32:$block_y,
+    I32:$block_z,
+    Optional<I32>:$bytes,
+    Optional<I32>:$stream,
+    Variadic<AnyType>:$args
+  );
+
+  let assemblyFormat = [{
+    $callee `<` `<` `<` $grid_x `,` $grid_y `,` $grid_z `,`$block_x `,`
+        $block_y `,` $block_z ( `,` $bytes^ ( `,` $stream^ )? )? `>` `>` `>`
+        `` `(` $args `)` ( `:` `(` type($args)^ `)` )? attr-dict
+  }];
+
+  let extraClassDeclaration = [{
+    mlir::CallInterfaceCallable getCallableForCallee() {
+      return getCalleeAttr();
+    }
+
+    void setCalleeFromCallable(mlir::CallInterfaceCallable callee) {
+      (*this)->setAttr(getCalleeAttrName(), callee.get<mlir::SymbolRefAttr>());
+    }
+    mlir::FunctionType getFunctionType();
+
+    unsigned getNbNoArgOperand() {
+      unsigned nbNoArgOperand = 5; // grids and blocks values are always present.
+      if (getBytes()) ++nbNoArgOperand;
+      if (getStream()) ++nbNoArgOperand;
+      return nbNoArgOperand;
+    }
+
+    operand_range getArgOperands() {
+      return {operand_begin() + getNbNoArgOperand(), operand_end()};
+    }
+    mlir::MutableOperandRange getArgOperandsMutable() {
+      return mlir::MutableOperandRange(
+          *this, getNbNoArgOperand(), getArgs().size() - 1);
+    }
+  }];
+}
+
+def cuf_KernelOp : cuf_Op<"kernel", [AttrSizedOperandSegments,
+    DeclareOpInterfaceMethods<LoopLikeOpInterface>]> {
+
+  let description = [{
+    Represent the CUDA Fortran kernel directive. The operation is a loop like
+    operation that represents the iteration range of the embedded loop nest.
+
+    When grid or block variadic operands are empty, a `*` only syntax was used
+    in the Fortran code.
+    If the `*` is mixed with values for either grid or block, these are
+    represented by a 0 constant value.
+  }];
+
+  let arguments = (ins
+    Variadic<I32>:$grid, // empty means `*`
+    Variadic<I32>:$block, // empty means `*`
+    Optional<I32>:$stream,
+    Variadic<Index>:$lowerbound,
+    Variadic<Index>:$upperbound,
+    Variadic<Index>:$step,
+    OptionalAttr<I64Attr>:$n
+  );
+
+  let regions = (region AnyRegion:$region);
+
+  let assemblyFormat = [{
+    `<` `<` `<` custom<CUFKernelValues>($grid, type($grid)) `,` 
+                custom<CUFKernelValues>($block, type($block))
+        ( `,` `stream` `=` $stream^ )? `>` `>` `>`
+        custom<CUFKernelLoopControl>($region, $lowerbound, type($lowerbound),
+            $upperbound, type($upperbound), $step, type($step))
+        attr-dict
+  }];
+
+  let hasVerifier = 1;
+}
+
+#endif // FORTRAN_DIALECT_CUF_CUF_OPS
diff --git a/flang/include/flang/Optimizer/Dialect/FIRAttr.td b/flang/include/flang/Optimizer/Dialect/FIRAttr.td
index f8b3fb861cc62..989319ff3ddaf 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRAttr.td
+++ b/flang/include/flang/Optimizer/Dialect/FIRAttr.td
@@ -70,87 +70,4 @@ def fir_BoxFieldAttr : I32EnumAttr<
 // mlir::SideEffects::Resource for modelling operations which add debugging information
 def DebuggingResource : Resource<"::fir::DebuggingResource">;
 
-//===----------------------------------------------------------------------===//
-// CUDA Fortran specific attributes
-//===----------------------------------------------------------------------===//
-
-def fir_CUDADataAttribute : I32EnumAttr<
-    "CUDADataAttribute",
-    "CUDA Fortran variable attributes",
-    [
-      I32EnumAttrCase<"Constant", 0, "constant">,
-      I32EnumAttrCase<"Device", 1, "device">,
-      I32EnumAttrCase<"Managed", 2, "managed">,
-      I32EnumAttrCase<"Pinned", 3, "pinned">,
-      I32EnumAttrCase<"Shared", 4, "shared">,
-      I32EnumAttrCase<"Unified", 5, "unified">,
-      // Texture is omitted since it is obsolete and rejected by semantic.
-    ]> {
-  let genSpecializedAttr = 0;
-  let cppNamespace = "::fir";
-}
-
-def fir_CUDADataAttributeAttr :
-    EnumAttr<FIROpsDialect, fir_CUDADataAttribute, "cuda"> {
-  let assemblyFormat = [{ ```<` $value `>` }];
-}
-
-def fir_CUDAProcAttribute : I32EnumAttr<
-    "CUDAProcAttribute", "CUDA Fortran procedure attributes",
-    [
-      I32EnumAttrCase<"Host", 0, "host">,
-      I32EnumAttrCase<"Device", 1, "device">,
-      I32EnumAttrCase<"HostDevice", 2, "host_device">,
-      I32EnumAttrCase<"Global", 3, "global">,
-      I32EnumAttrCase<"GridGlobal", 4, "grid_global">,
-    ]> {
-  let genSpecializedAttr = 0;
-  let cppNamespace = "::fir";
-}
-
-def fir_CUDAProcAttributeAttr :
-    EnumAttr<FIROpsDialect, fir_CUDAProcAttribute, "cuda_proc"> {
-  let assemblyFormat = [{ ```<` $value `>` }];
-}
-
-def fir_CUDALaunchBoundsAttr : fir_Attr<"CUDALaunchBounds"> {
-  let mnemonic = "launch_bounds";
-
-  let parameters = (ins
-    "mlir::IntegerAttr":$maxTPB,
-    "mlir::IntegerAttr":$minBPM,
-    OptionalParameter<"mlir::IntegerAttr">:$upperBoundClusterSize
-  );
-
-  let assemblyFormat = "`<` struct(params) `>`";
-}
-
-def fir_CUDAClusterDimsAttr : fir_Attr<"CUDAClusterDims"> {
-  let mnemonic = "cluster_dims";
-
-  let parameters = (ins
-    "mlir::IntegerAttr":$x,
-    "mlir::IntegerAttr":$y,
-    "mlir::IntegerAttr":$z
-  );
-
-  let assemblyFormat = "`<` struct(params) `>`";
-}
-
-def fir_CUDADataTransferKind : I32EnumAttr<
-    "CUDADataTransferKind", "CUDA Fortran data transfer kind",
-    [
-      I32EnumAttrCase<"DeviceHost", 0, "device_host">,
-      I32EnumAttrCase<"HostDevice", 1, "host_device">,
-      I32EnumAttrCase<"DeviceDevice", 2, "device_device">,
-    ]> {
-  let genSpecializedAttr = 0;
-  let cppNamesp...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/92317


More information about the flang-commits mailing list