[flang-commits] [flang] [flang][cuda] Add cuf.shared_memory operation (PR #131392)

Valentin Clement バレンタイン クレメン via flang-commits flang-commits at lists.llvm.org
Fri Mar 14 14:12:59 PDT 2025


https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/131392

>From ded7683de5ec0df0358953dd8d93b12626212eea Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 14 Mar 2025 14:07:22 -0700
Subject: [PATCH 1/2] [flang][cuda] Add cuf.shared_memory operation

---
 .../flang/Optimizer/Dialect/CUF/CUFOps.td     | 29 +++++++++++++++++++
 flang/test/Fir/cuf.mlir                       | 27 +++++++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index c1021da0cfb21..eda129fb59ded 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -349,4 +349,33 @@ def cuf_DeviceAddressOp : cuf_Op<"device_address", []> {
   let results = (outs fir_ReferenceType:$addr);
 }
 
+def cuf_SharedMemoryOp
+    : cuf_Op<"shared_memory", [AttrSizedOperandSegments, Pure]> {
+  let summary = "Get the pointer to the kernel shared memory";
+
+  let description = [{
+    Return the pointer in the shared memory relative to the specified offset.
+  }];
+
+  let arguments = (ins TypeAttr:$in_type, OptionalAttr<StrAttr>:$uniq_name,
+      OptionalAttr<StrAttr>:$bindc_name, Variadic<AnyIntegerType>:$typeparams,
+      Variadic<AnyIntegerType>:$shape,
+      OptionalAttr<I32Attr>:$offset // offset in bytes from the shared memory
+                                    // base address.
+  );
+
+  let results = (outs fir_ReferenceType:$ptr);
+
+  let assemblyFormat = [{
+      $in_type (`(` $typeparams^ `:` type($typeparams) `)`)?
+        (`,` $shape^ `:` type($shape) )?  attr-dict `->` qualified(type($ptr))
+  }];
+
+  let builders = [OpBuilder<(ins "mlir::Type":$inType,
+      "llvm::StringRef":$uniqName, "llvm::StringRef":$bindcName,
+      CArg<"mlir::ValueRange", "{}">:$typeparams,
+      CArg<"mlir::ValueRange", "{}">:$shape,
+      CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes)>];
+}
+
 #endif // FORTRAN_DIALECT_CUF_CUF_OPS
diff --git a/flang/test/Fir/cuf.mlir b/flang/test/Fir/cuf.mlir
index 188044d04b848..d38b26a4548ed 100644
--- a/flang/test/Fir/cuf.mlir
+++ b/flang/test/Fir/cuf.mlir
@@ -86,3 +86,30 @@ func.func @_QPsub1() {
 // CHECK: cuf.alloc
 // CHECK: cuf.free
 
+// -----
+
+ gpu.module @cuda_device_mod {
+  gpu.func @_QPdynshared() kernel {
+    %c-1 = arith.constant -1 : index
+    %6 = cuf.shared_memory !fir.array<?xf32>, %c-1 : index {bindc_name = "r", uniq_name = "_QFdynsharedEr"} -> !fir.ref<!fir.array<?xf32>>
+    %7 = fir.shape %c-1 : (index) -> !fir.shape<1>
+    %8 = fir.declare %6(%7) {data_attr = #cuf.cuda<shared>, uniq_name = "_QFdynsharedEr"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<?xf32>>
+    gpu.return
+  }
+}
+
+// CHECK: cuf.shared_memory
+
+// -----
+
+gpu.module @cuda_device_mod {
+  gpu.func @_QPshared_static() attributes {cuf.proc_attr = #cuf.cuda_proc<global>} {
+    %0 = cuf.shared_memory i32 {bindc_name = "a", uniq_name = "_QFshared_staticEa"} -> !fir.ref<i32>
+    %1 = fir.declare %0 {data_attr = #cuf.cuda<shared>, uniq_name = "_QFshared_staticEa"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %2 = cuf.shared_memory i32 {bindc_name = "b", uniq_name = "_QFshared_staticEb"} -> !fir.ref<i32>
+    %3 = fir.declare %2 {data_attr = #cuf.cuda<shared>, uniq_name = "_QFshared_staticEb"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    gpu.return
+  }
+}
+
+// CHECK-COUNT-2: cuf.shared_memory 

>From aada4cf25bdb959c9ee45a2b5c3aedddd58fb0b0 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 14 Mar 2025 14:12:42 -0700
Subject: [PATCH 2/2] Add missing builder

---
 .../flang/Optimizer/Transforms/Passes.td      | 13 +++++++++++++
 flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp    | 19 +++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index e5c17cf7d8881..fbab435887b8a 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -453,6 +453,19 @@ def CUFGPUToLLVMConversion : Pass<"cuf-gpu-convert-to-llvm", "mlir::ModuleOp"> {
   ];
 }
 
+def CUFComputeSharedMemoryOffsetsAndSize
+    : Pass<"cuf-compute-shared-memory", "mlir::ModuleOp"> {
+  let summary = "Create the shared memory global variable and set offsets";
+
+  let description = [{
+    Compute the size and alignment of the shared memory global and materialize
+    it. Compute the offset of each cuf.shared_memory operation according to
+    the global and set it.
+  }];
+
+  let dependentDialects = ["fir::FIROpsDialect"];
+}
+
 def SetRuntimeCallAttributes
     : Pass<"set-runtime-call-attrs", "mlir::func::FuncOp"> {
   let summary = "Set Fortran runtime fir.call attributes targeting LLVM IR";
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
index fa82f3916a57e..3c7af9fc8a7d8 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -300,6 +300,25 @@ mlir::LogicalResult cuf::RegisterKernelOp::verify() {
   return emitOpError("device function not found");
 }
 
+//===----------------------------------------------------------------------===//
+// SharedMemoryOp
+//===----------------------------------------------------------------------===//
+
+void cuf::SharedMemoryOp::build(
+    mlir::OpBuilder &builder, mlir::OperationState &result, mlir::Type inType,
+    llvm::StringRef uniqName, llvm::StringRef bindcName,
+    mlir::ValueRange typeparams, mlir::ValueRange shape,
+    llvm::ArrayRef<mlir::NamedAttribute> attributes) {
+  mlir::StringAttr nameAttr =
+      uniqName.empty() ? mlir::StringAttr{} : builder.getStringAttr(uniqName);
+  mlir::StringAttr bindcAttr =
+      bindcName.empty() ? mlir::StringAttr{} : builder.getStringAttr(bindcName);
+  build(builder, result, wrapAllocaResultType(inType),
+        mlir::TypeAttr::get(inType), nameAttr, bindcAttr, typeparams, shape,
+        mlir::IntegerAttr{});
+  result.addAttributes(attributes);
+}
+
 // Tablegen operators
 
 #define GET_OP_CLASSES



More information about the flang-commits mailing list