[flang-commits] [flang] [flang][cuda] Add operation to sync global descriptor (PR #121520)

Valentin Clement バレンタイン クレメン via flang-commits flang-commits at lists.llvm.org
Thu Jan 2 12:19:33 PST 2025


https://github.com/clementval created https://github.com/llvm/llvm-project/pull/121520

Introduce cuf.sync_descriptor to be used to sync device global descriptor after pointer association. 

>From f37e52237791f58438790c77edeb8de08f692987 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 2 Jan 2025 12:17:17 -0800
Subject: [PATCH] [flang][cuda] Add operation to sync global descriptor

---
 .../{Transforms => Builder}/CUFCommon.h       |  0
 .../flang/Optimizer/Dialect/CUF/CUFOps.td     | 11 +++++++++++
 flang/lib/Lower/Allocatable.cpp               | 19 +++++++++++++++++++
 flang/lib/Optimizer/Builder/CMakeLists.txt    |  1 +
 .../{Transforms => Builder}/CUFCommon.cpp     |  2 +-
 flang/lib/Optimizer/Transforms/CMakeLists.txt |  1 -
 .../Transforms/CUFAddConstructor.cpp          |  2 +-
 .../Optimizer/Transforms/CUFDeviceGlobal.cpp  |  2 +-
 .../Optimizer/Transforms/CUFOpConversion.cpp  |  2 +-
 .../Transforms/SimplifyIntrinsics.cpp         |  2 +-
 flang/test/Lower/CUDA/cuda-pointer-sync.cuf   | 17 +++++++++++++++++
 11 files changed, 53 insertions(+), 6 deletions(-)
 rename flang/include/flang/Optimizer/{Transforms => Builder}/CUFCommon.h (100%)
 rename flang/lib/Optimizer/{Transforms => Builder}/CUFCommon.cpp (97%)
 create mode 100644 flang/test/Lower/CUDA/cuda-pointer-sync.cuf

diff --git a/flang/include/flang/Optimizer/Transforms/CUFCommon.h b/flang/include/flang/Optimizer/Builder/CUFCommon.h
similarity index 100%
rename from flang/include/flang/Optimizer/Transforms/CUFCommon.h
rename to flang/include/flang/Optimizer/Builder/CUFCommon.h
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index d06587c57d44b6..9a31ffa2e94712 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -140,6 +140,17 @@ def cuf_DeallocateOp : cuf_Op<"deallocate",
   let hasVerifier = 1;
 }
 
+def cuf_SyncDescriptorOp : cuf_Op<"sync_descriptor", []> {
+  let summary =
+      "Synchronize the host and device descriptor of a Fortran pointer";
+
+  let arguments = (ins SymbolRefAttr:$globalName);
+
+  let assemblyFormat = [{
+    $globalName attr-dict
+  }];
+}
+
 def cuf_DataTransferOp : cuf_Op<"data_transfer", []> {
   let summary = "Represent a data transfer between host and device memory";
 
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index fb8380ac7e8c51..4c64870675816e 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -22,12 +22,14 @@
 #include "flang/Lower/PFTBuilder.h"
 #include "flang/Lower/Runtime.h"
 #include "flang/Lower/StatementContext.h"
+#include "flang/Optimizer/Builder/CUFCommon.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
 #include "flang/Optimizer/Builder/Todo.h"
 #include "flang/Optimizer/Dialect/CUF/CUFOps.h"
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/Support/FatalError.h"
 #include "flang/Optimizer/Support/InternalNames.h"
 #include "flang/Parser/parse-tree.h"
@@ -1086,6 +1088,22 @@ bool Fortran::lower::isArraySectionWithoutVectorSubscript(
          !Fortran::evaluate::HasVectorSubscript(expr);
 }
 
+static void genCUFPointerSync(const mlir::Value box,
+                              fir::FirOpBuilder &builder) {
+  if (auto declareOp = box.getDefiningOp<hlfir::DeclareOp>()) {
+    if (auto addrOfOp = declareOp.getMemref().getDefiningOp<fir::AddrOfOp>()) {
+      auto mod = addrOfOp->getParentOfType<mlir::ModuleOp>();
+      if (auto globalOp =
+              mod.lookupSymbol<fir::GlobalOp>(addrOfOp.getSymbol())) {
+        if (cuf::isRegisteredDeviceGlobal(globalOp)) {
+          builder.create<cuf::SyncDescriptorOp>(box.getLoc(),
+                                                addrOfOp.getSymbol());
+        }
+      }
+    }
+  }
+}
+
 void Fortran::lower::associateMutableBox(
     Fortran::lower::AbstractConverter &converter, mlir::Location loc,
     const fir::MutableBoxValue &box, const Fortran::lower::SomeExpr &source,
@@ -1098,6 +1116,7 @@ void Fortran::lower::associateMutableBox(
   if (converter.getLoweringOptions().getLowerToHighLevelFIR()) {
     fir::ExtendedValue rhs = converter.genExprAddr(loc, source, stmtCtx);
     fir::factory::associateMutableBox(builder, loc, box, rhs, lbounds);
+    genCUFPointerSync(box.getAddr(), builder);
     return;
   }
   // The right hand side is not be evaluated into a temp. Array sections can
diff --git a/flang/lib/Optimizer/Builder/CMakeLists.txt b/flang/lib/Optimizer/Builder/CMakeLists.txt
index 05164d41a4cb55..a824d70fdb5c74 100644
--- a/flang/lib/Optimizer/Builder/CMakeLists.txt
+++ b/flang/lib/Optimizer/Builder/CMakeLists.txt
@@ -5,6 +5,7 @@ add_flang_library(FIRBuilder
   BoxValue.cpp
   Character.cpp
   Complex.cpp
+  CUFCommon.cpp
   DoLoopHelper.cpp
   FIRBuilder.cpp
   HLFIRTools.cpp
diff --git a/flang/lib/Optimizer/Transforms/CUFCommon.cpp b/flang/lib/Optimizer/Builder/CUFCommon.cpp
similarity index 97%
rename from flang/lib/Optimizer/Transforms/CUFCommon.cpp
rename to flang/lib/Optimizer/Builder/CUFCommon.cpp
index bbe33217e8f455..81a8a90ce394e0 100644
--- a/flang/lib/Optimizer/Transforms/CUFCommon.cpp
+++ b/flang/lib/Optimizer/Builder/CUFCommon.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "flang/Optimizer/Transforms/CUFCommon.h"
+#include "flang/Optimizer/Builder/CUFCommon.h"
 #include "flang/Optimizer/Dialect/CUF/CUFOps.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index 9eafa4ec234bdd..d20d3bc4108ce9 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -9,7 +9,6 @@ add_flang_library(FIRTransforms
   CompilerGeneratedNames.cpp
   ConstantArgumentGlobalisation.cpp
   ControlFlowConverter.cpp
-  CUFCommon.cpp
   CUFAddConstructor.cpp
   CUFDeviceGlobal.cpp
   CUFOpConversion.cpp
diff --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
index 9591f48c5d4177..97551595db039c 100644
--- a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Optimizer/Builder/CUFCommon.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
 #include "flang/Optimizer/Builder/Todo.h"
@@ -19,7 +20,6 @@
 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
 #include "flang/Optimizer/Support/DataLayout.h"
-#include "flang/Optimizer/Transforms/CUFCommon.h"
 #include "flang/Runtime/CUDA/registration.h"
 #include "flang/Runtime/entry-names.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
diff --git a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
index 07cc1f3b4b51ce..2e6c272fa90891 100644
--- a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
@@ -7,12 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "flang/Common/Fortran.h"
+#include "flang/Optimizer/Builder/CUFCommon.h"
 #include "flang/Optimizer/Dialect/CUF/CUFOps.h"
 #include "flang/Optimizer/Dialect/FIRDialect.h"
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/Support/InternalNames.h"
-#include "flang/Optimizer/Transforms/CUFCommon.h"
 #include "flang/Runtime/CUDA/common.h"
 #include "flang/Runtime/allocatable.h"
 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index de5c51556eecfc..fb0ef246546444 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -8,6 +8,7 @@
 
 #include "flang/Optimizer/Transforms/CUFOpConversion.h"
 #include "flang/Common/Fortran.h"
+#include "flang/Optimizer/Builder/CUFCommon.h"
 #include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
 #include "flang/Optimizer/CodeGen/TypeConverter.h"
 #include "flang/Optimizer/Dialect/CUF/CUFOps.h"
@@ -15,7 +16,6 @@
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/Support/DataLayout.h"
-#include "flang/Optimizer/Transforms/CUFCommon.h"
 #include "flang/Runtime/CUDA/allocatable.h"
 #include "flang/Runtime/CUDA/common.h"
 #include "flang/Runtime/CUDA/descriptor.h"
diff --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
index d3567f453fceb3..fa6a7b23624e84 100644
--- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
@@ -24,6 +24,7 @@
 
 #include "flang/Common/Fortran.h"
 #include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Optimizer/Builder/CUFCommon.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Builder/LowLevelIntrinsics.h"
 #include "flang/Optimizer/Builder/Todo.h"
@@ -31,7 +32,6 @@
 #include "flang/Optimizer/Dialect/FIRType.h"
 #include "flang/Optimizer/Dialect/Support/FIRContext.h"
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
-#include "flang/Optimizer/Transforms/CUFCommon.h"
 #include "flang/Optimizer/Transforms/Passes.h"
 #include "flang/Optimizer/Transforms/Utils.h"
 #include "flang/Runtime/entry-names.h"
diff --git a/flang/test/Lower/CUDA/cuda-pointer-sync.cuf b/flang/test/Lower/CUDA/cuda-pointer-sync.cuf
new file mode 100644
index 00000000000000..e17869b2d63573
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-pointer-sync.cuf
@@ -0,0 +1,17 @@
+! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+
+module devptr
+  real, device, pointer, dimension(:) :: dev_ptr
+end module
+
+use devptr
+real, device, target, dimension(4) :: a_dev
+a_dev = 42.0
+dev_ptr => a_dev
+end
+
+! CHECK: fir.global @_QMdevptrEdev_ptr {data_attr = #cuf.cuda<device>} : !fir.box<!fir.ptr<!fir.array<?xf32>>>
+! CHECK-LABEL: func.func @_QQmain()
+! CHECK: fir.embox
+! CHECK: fir.store
+! CHECK: cuf.sync_descriptor @_QMdevptrEdev_ptr



More information about the flang-commits mailing list