[flang-commits] [flang] 4fde8c3 - [flang][cuda] Lower CUDA shared variable with cuf.shared_memory op (#131399)

via flang-commits flang-commits at lists.llvm.org
Sun Mar 16 17:45:00 PDT 2025


Author: Valentin Clement (バレンタイン クレメン)
Date: 2025-03-16T17:44:56-07:00
New Revision: 4fde8c341f9166e6ec6dff6e7704be175e382f5b

URL: https://github.com/llvm/llvm-project/commit/4fde8c341f9166e6ec6dff6e7704be175e382f5b
DIFF: https://github.com/llvm/llvm-project/commit/4fde8c341f9166e6ec6dff6e7704be175e382f5b.diff

LOG: [flang][cuda] Lower CUDA shared variable with cuf.shared_memory op (#131399)

Use `cuf.shared_memory` operation instead of `cuf.alloc` for CUDA shared
variable. These variables do not need free operations.

Added: 
    flang/test/Lower/CUDA/cuda-shared.cuf

Modified: 
    flang/lib/Lower/ConvertVariable.cpp

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index ab5e6346f8d54..05256fec67241 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -738,9 +738,11 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter,
     auto idxTy = builder.getIndexType();
     for (mlir::Value sh : elidedShape)
       indices.push_back(builder.createConvert(loc, idxTy, sh));
-    mlir::Value alloc = builder.create<cuf::AllocOp>(
-        loc, ty, nm, symNm, dataAttr, lenParams, indices);
-    return alloc;
+    if (dataAttr.getValue() == cuf::DataAttribute::Shared)
+      return builder.create<cuf::SharedMemoryOp>(loc, ty, nm, symNm, lenParams,
+                                                 indices);
+    return builder.create<cuf::AllocOp>(loc, ty, nm, symNm, dataAttr, lenParams,
+                                        indices);
   }
 
   // Let the builder do all the heavy lifting.
@@ -1032,12 +1034,16 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter,
                                                symMap);
   if (Fortran::semantics::NeedCUDAAlloc(var.getSymbol())) {
     auto *builder = &converter.getFirOpBuilder();
+    cuf::DataAttributeAttr dataAttr =
+        Fortran::lower::translateSymbolCUFDataAttribute(builder->getContext(),
+                                                        var.getSymbol());
     mlir::Location loc = converter.getCurrentLocation();
     fir::ExtendedValue exv =
         converter.getSymbolExtendedValue(var.getSymbol(), &symMap);
     auto *sym = &var.getSymbol();
     const Fortran::semantics::Scope &owner = sym->owner();
-    if (owner.kind() != Fortran::semantics::Scope::Kind::MainProgram) {
+    if (owner.kind() != Fortran::semantics::Scope::Kind::MainProgram &&
+        dataAttr.getValue() != cuf::DataAttribute::Shared) {
       converter.getFctCtx().attachCleanup([builder, loc, exv, sym]() {
         cuf::DataAttributeAttr dataAttr =
             Fortran::lower::translateSymbolCUFDataAttribute(

diff  --git a/flang/test/Lower/CUDA/cuda-shared.cuf b/flang/test/Lower/CUDA/cuda-shared.cuf
new file mode 100644
index 0000000000000..f41011df06ae7
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-shared.cuf
@@ -0,0 +1,12 @@
+! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+
+attributes(global) subroutine sharedmem()
+  real, shared :: s(32)
+  integer :: t
+  t = threadIdx%x
+  s(t) = t
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsharedmem() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
+! CHECK: %{{.*}} = cuf.shared_memory !fir.array<32xf32> {bindc_name = "s", uniq_name = "_QFsharedmemEs"} -> !fir.ref<!fir.array<32xf32>>
+! CHECK-NOT: cuf.free


        


More information about the flang-commits mailing list