[flang-commits] [flang] 4818623 - [flang][cuda] Add cuf.shared_memory operation (#131392)

via flang-commits flang-commits at lists.llvm.org
Fri Mar 14 15:43:29 PDT 2025


Author: Valentin Clement (バレンタイン クレメン)
Date: 2025-03-14T15:43:25-07:00
New Revision: 4818623924a6846d17196a149b633b6bc3316b83

URL: https://github.com/llvm/llvm-project/commit/4818623924a6846d17196a149b633b6bc3316b83
DIFF: https://github.com/llvm/llvm-project/commit/4818623924a6846d17196a149b633b6bc3316b83.diff

LOG: [flang][cuda] Add cuf.shared_memory operation (#131392)

Introduce `cuf.shared_memory` operation. The operation is used to get
the pointer in shared memory for a specific variable. The shared memory
is materialized as a global in address space 3 and the different
variables are pointing to it at different offset.

Follow up patches will add lowering and conversion of this operation.

Added: 
    

Modified: 
    flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
    flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
    flang/test/Fir/cuf.mlir

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index c1021da0cfb21..eda129fb59ded 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -349,4 +349,33 @@ def cuf_DeviceAddressOp : cuf_Op<"device_address", []> {
   let results = (outs fir_ReferenceType:$addr);
 }
 
+def cuf_SharedMemoryOp
+    : cuf_Op<"shared_memory", [AttrSizedOperandSegments, Pure]> {
+  let summary = "Get the pointer to the kernel shared memory";
+
+  let description = [{
+    Return the pointer in the shared memory relative to the specified offset.
+  }];
+
+  let arguments = (ins TypeAttr:$in_type, OptionalAttr<StrAttr>:$uniq_name,
+      OptionalAttr<StrAttr>:$bindc_name, Variadic<AnyIntegerType>:$typeparams,
+      Variadic<AnyIntegerType>:$shape,
+      OptionalAttr<I32Attr>:$offset // offset in bytes from the shared memory
+                                    // base address.
+  );
+
+  let results = (outs fir_ReferenceType:$ptr);
+
+  let assemblyFormat = [{
+      $in_type (`(` $typeparams^ `:` type($typeparams) `)`)?
+        (`,` $shape^ `:` type($shape) )?  attr-dict `->` qualified(type($ptr))
+  }];
+
+  let builders = [OpBuilder<(ins "mlir::Type":$inType,
+      "llvm::StringRef":$uniqName, "llvm::StringRef":$bindcName,
+      CArg<"mlir::ValueRange", "{}">:$typeparams,
+      CArg<"mlir::ValueRange", "{}">:$shape,
+      CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes)>];
+}
+
 #endif // FORTRAN_DIALECT_CUF_CUF_OPS

diff  --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
index fa82f3916a57e..3c7af9fc8a7d8 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -300,6 +300,25 @@ mlir::LogicalResult cuf::RegisterKernelOp::verify() {
   return emitOpError("device function not found");
 }
 
+//===----------------------------------------------------------------------===//
+// SharedMemoryOp
+//===----------------------------------------------------------------------===//
+
+void cuf::SharedMemoryOp::build(
+    mlir::OpBuilder &builder, mlir::OperationState &result, mlir::Type inType,
+    llvm::StringRef uniqName, llvm::StringRef bindcName,
+    mlir::ValueRange typeparams, mlir::ValueRange shape,
+    llvm::ArrayRef<mlir::NamedAttribute> attributes) {
+  mlir::StringAttr nameAttr =
+      uniqName.empty() ? mlir::StringAttr{} : builder.getStringAttr(uniqName);
+  mlir::StringAttr bindcAttr =
+      bindcName.empty() ? mlir::StringAttr{} : builder.getStringAttr(bindcName);
+  build(builder, result, wrapAllocaResultType(inType),
+        mlir::TypeAttr::get(inType), nameAttr, bindcAttr, typeparams, shape,
+        mlir::IntegerAttr{});
+  result.addAttributes(attributes);
+}
+
 // Tablegen operators
 
 #define GET_OP_CLASSES

diff  --git a/flang/test/Fir/cuf.mlir b/flang/test/Fir/cuf.mlir
index 188044d04b848..d38b26a4548ed 100644
--- a/flang/test/Fir/cuf.mlir
+++ b/flang/test/Fir/cuf.mlir
@@ -86,3 +86,30 @@ func.func @_QPsub1() {
 // CHECK: cuf.alloc
 // CHECK: cuf.free
 
+// -----
+
+ gpu.module @cuda_device_mod {
+  gpu.func @_QPdynshared() kernel {
+    %c-1 = arith.constant -1 : index
+    %6 = cuf.shared_memory !fir.array<?xf32>, %c-1 : index {bindc_name = "r", uniq_name = "_QFdynsharedEr"} -> !fir.ref<!fir.array<?xf32>>
+    %7 = fir.shape %c-1 : (index) -> !fir.shape<1>
+    %8 = fir.declare %6(%7) {data_attr = #cuf.cuda<shared>, uniq_name = "_QFdynsharedEr"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<?xf32>>
+    gpu.return
+  }
+}
+
+// CHECK: cuf.shared_memory
+
+// -----
+
+gpu.module @cuda_device_mod {
+  gpu.func @_QPshared_static() attributes {cuf.proc_attr = #cuf.cuda_proc<global>} {
+    %0 = cuf.shared_memory i32 {bindc_name = "a", uniq_name = "_QFshared_staticEa"} -> !fir.ref<i32>
+    %1 = fir.declare %0 {data_attr = #cuf.cuda<shared>, uniq_name = "_QFshared_staticEa"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %2 = cuf.shared_memory i32 {bindc_name = "b", uniq_name = "_QFshared_staticEb"} -> !fir.ref<i32>
+    %3 = fir.declare %2 {data_attr = #cuf.cuda<shared>, uniq_name = "_QFshared_staticEb"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    gpu.return
+  }
+}
+
+// CHECK-COUNT-2: cuf.shared_memory 


        


More information about the flang-commits mailing list