[flang-commits] [flang] [flang][MLIR][OpenMP] Extend delayed privatization for scalar allocatables and pointers (PR #84740)

Kareem Ergawy via flang-commits flang-commits at lists.llvm.org
Fri Mar 15 06:48:52 PDT 2024


https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/84740

>From 781604993a04004fd85a34f08aef46f9d97f8abf Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Mon, 11 Mar 2024 04:39:51 -0500
Subject: [PATCH] [flang][MLIR][OpenMP] Extend delayed privatization for scalar
 allocatables and pointers

One more step in extending support for delayed privatization. This diff
adds support for scalar allocatables and pointers.
---
 flang/lib/Lower/Bridge.cpp                    | 36 +++++++++++-----
 .../lib/Lower/OpenMP/DataSharingProcessor.cpp | 11 +++--
 ...privatization-allocatable-firstprivate.f90 | 36 ++++++++++++++++
 ...ayed-privatization-allocatable-private.f90 | 43 +++++++++++++++++++
 .../OpenMP/delayed-privatization-pointer.f90  | 31 +++++++++++++
 5 files changed, 143 insertions(+), 14 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
 create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
 create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-pointer.f90

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index a668ba4116faab..bc2e2adcb57c7b 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -748,7 +748,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
 
   void copyVar(mlir::Location loc, mlir::Value dst,
                mlir::Value src) override final {
-    copyVarHLFIR(loc, dst, src);
+    copyVarHLFIR(loc, Fortran::lower::SymbolBox::Intrinsic{dst},
+                 Fortran::lower::SymbolBox::Intrinsic{src});
   }
 
   void copyHostAssociateVar(
@@ -1009,10 +1010,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       // `omp.private`'s `alloc` block. If this is the case, we return this
       // `SymbolBox::Intrinsic` value.
       if (Fortran::lower::SymbolBox v = symMap->lookupSymbol(sym))
-        return v.match(
-            [&](const Fortran::lower::SymbolBox::Intrinsic &)
-                -> Fortran::lower::SymbolBox { return v; },
-            [](const auto &) -> Fortran::lower::SymbolBox { return {}; });
+        return v;
 
       return {};
     }
@@ -1060,15 +1058,16 @@ class FirConverter : public Fortran::lower::AbstractConverter {
                const Fortran::lower::SymbolBox &rhs_sb) {
     mlir::Location loc = genLocation(sym.name());
     if (lowerToHighLevelFIR())
-      copyVarHLFIR(loc, lhs_sb.getAddr(), rhs_sb.getAddr());
+      copyVarHLFIR(loc, lhs_sb, rhs_sb);
     else
       copyVarFIR(loc, sym, lhs_sb, rhs_sb);
   }
 
-  void copyVarHLFIR(mlir::Location loc, mlir::Value dst, mlir::Value src) {
+  void copyVarHLFIR(mlir::Location loc, Fortran::lower::SymbolBox dst,
+                    Fortran::lower::SymbolBox src) {
     assert(lowerToHighLevelFIR());
-    hlfir::Entity lhs{dst};
-    hlfir::Entity rhs{src};
+    hlfir::Entity lhs{dst.getAddr()};
+    hlfir::Entity rhs{src.getAddr()};
     // Temporary_lhs is set to true in hlfir.assign below to avoid user
     // assignment to be used and finalization to be called on the LHS.
     // This may or may not be correct but mimics the current behaviour
@@ -1082,7 +1081,22 @@ class FirConverter : public Fortran::lower::AbstractConverter {
           /*keepLhsLengthInAllocatableAssignment=*/false,
           /*temporary_lhs=*/true);
     };
-    if (lhs.isAllocatable()) {
+
+    bool isBoxAllocatable = dst.match(
+        [](const fir::MutableBoxValue &box) { return box.isAllocatable(); },
+        [](const fir::FortranVariableOpInterface &box) {
+          return fir::FortranVariableOpInterface(box).isAllocatable();
+        },
+        [](const auto &box) { return false; });
+
+    bool isBoxPointer = dst.match(
+        [](const fir::MutableBoxValue &box) { return box.isPointer(); },
+        [](const fir::FortranVariableOpInterface &box) {
+          return fir::FortranVariableOpInterface(box).isPointer();
+        },
+        [](const auto &box) { return false; });
+
+    if (isBoxAllocatable) {
       // Deep copy allocatable if it is allocated.
       // Note that when allocated, the RHS is already allocated with the LHS
       // shape for copy on entry in createHostAssociateVarClone.
@@ -1097,7 +1111,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
             copyData(lhs, rhs);
           })
           .end();
-    } else if (lhs.isPointer()) {
+    } else if (isBoxPointer) {
       // Set LHS target to the target of RHS (do not copy the RHS
       // target data into the LHS target storage).
       auto loadVal = builder->create<fir::LoadOp>(loc, rhs);
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 717b8cc0276a30..ec391ec8b06f7e 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -50,6 +50,7 @@ void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) {
 }
 
 void DataSharingProcessor::insertDeallocs() {
+  // TODO Extend delayed privatization to include a `dealloc` region.
   for (const Fortran::semantics::Symbol *sym : privatizedSymbols)
     if (Fortran::semantics::IsAllocatable(sym->GetUltimate())) {
       converter.createHostAssociateVarCloneDealloc(*sym);
@@ -376,6 +377,7 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
         symLoc, uniquePrivatizerName, symType,
         isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate
                        : mlir::omp::DataSharingClauseType::Private);
+    fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym);
 
     symTable->pushScope();
 
@@ -386,7 +388,8 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
           &allocRegion, /*insertPt=*/{}, symType, symLoc);
 
       firOpBuilder.setInsertionPointToEnd(allocEntryBlock);
-      symTable->addSymbol(*sym, allocRegion.getArgument(0));
+      symTable->addSymbol(*sym,
+                          fir::substBase(symExV, allocRegion.getArgument(0)));
       symTable->pushScope();
       cloneSymbol(sym);
       firOpBuilder.create<mlir::omp::YieldOp>(
@@ -403,10 +406,12 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
       mlir::Block *copyEntryBlock = firOpBuilder.createBlock(
           &copyRegion, /*insertPt=*/{}, {symType, symType}, {symLoc, symLoc});
       firOpBuilder.setInsertionPointToEnd(copyEntryBlock);
-      symTable->addSymbol(*sym, copyRegion.getArgument(0),
+      symTable->addSymbol(*sym,
+                          fir::substBase(symExV, copyRegion.getArgument(0)),
                           /*force=*/true);
       symTable->pushScope();
-      symTable->addSymbol(*sym, copyRegion.getArgument(1));
+      symTable->addSymbol(*sym,
+                          fir::substBase(symExV, copyRegion.getArgument(1)));
       auto ip = firOpBuilder.saveInsertionPoint();
       copyFirstPrivateSymbol(sym, &ip);
 
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
new file mode 100644
index 00000000000000..aa835f82b2730b
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
@@ -0,0 +1,36 @@
+! Test delayed privatization for allocatables: `firstprivate`.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN:   -o - %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 |\
+! RUN:   FileCheck %s
+
+subroutine delayed_privatization_allocatable
+  implicit none
+  integer, allocatable :: var1
+
+!$omp parallel firstprivate(var1)
+  var1 = 10
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = firstprivate}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.heap<i32>>>]] alloc {
+
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK: } copy {
+! CHECK: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK-NEXT:  %[[PRIV_BASE_VAL:.*]] = fir.load %[[PRIV_PRIV_ARG]]
+! CHECK-NEXT:  %[[PRIV_BASE_BOX:.*]] = fir.box_addr %[[PRIV_BASE_VAL]]
+! CHECK-NEXT:  %[[PRIV_BASE_ADDR:.*]] = fir.convert %[[PRIV_BASE_BOX]]
+! CHECK-NEXT:  %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT:  %[[COPY_COND:.*]] = arith.cmpi ne, %[[PRIV_BASE_ADDR]], %[[C0]] : i64
+
+! CHECK-NEXT:  fir.if %[[COPY_COND]] {
+! CHECK-NEXT:    %[[ORIG_BASE_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]]
+! CHECK-NEXT:    %[[ORIG_BASE_ADDR:.*]] = fir.box_addr %[[ORIG_BASE_VAL]]
+! CHECK-NEXT:    %[[ORIG_BASE_LD:.*]] = fir.load %[[ORIG_BASE_ADDR]]
+! CHECK-NEXT:    hlfir.assign %[[ORIG_BASE_LD]] to %[[PRIV_BASE_BOX]] temporary_lhs
+! CHECK-NEXT:  }
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
new file mode 100644
index 00000000000000..cc1818b00b809c
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
@@ -0,0 +1,43 @@
+! Test delayed privatization for allocatables: `private`.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN:   -o - %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 |\
+! RUN:   FileCheck %s
+
+subroutine delayed_privatization_allocatable
+  implicit none
+  integer, allocatable :: var1
+
+!$omp parallel private(var1)
+  var1 = 10
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = private}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.heap<i32>>>]] alloc {
+
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK-NEXT:   %[[PRIV_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<i32>> {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_allocatableEvar1"}
+
+! CHECK-NEXT:   %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK-NEXT:   %[[PRIV_ARG_BOX:.*]] = fir.box_addr %[[PRIV_ARG_VAL]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+! CHECK-NEXT:   %[[PRIV_ARG_ADDR:.*]] = fir.convert %[[PRIV_ARG_BOX]] : (!fir.heap<i32>) -> i64
+! CHECK-NEXT:   %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT:   %[[ALLOC_COND:.*]] = arith.cmpi ne, %[[PRIV_ARG_ADDR]], %[[C0]] : i64
+
+! CHECK-NEXT:   fir.if %[[ALLOC_COND]] {
+! CHECK-NEXT:     %[[PRIV_ALLOCMEM:.*]] = fir.allocmem i32 {fir.must_be_heap = true, uniq_name = "_QFdelayed_privatization_allocatableEvar1.alloc"}
+! CHECK-NEXT:     %[[PRIV_ALLOCMEM_BOX:.*]] = fir.embox %[[PRIV_ALLOCMEM]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+! CHECK-NEXT:     fir.store %[[PRIV_ALLOCMEM_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK-NEXT:   } else {
+! CHECK-NEXT:     %[[ZERO_BITS:.*]] = fir.zero_bits !fir.heap<i32>
+! CHECK-NEXT:     %[[ZERO_BOX:.*]] = fir.embox %[[ZERO_BITS]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+! CHECK-NEXT:     fir.store %[[ZERO_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK-NEXT:   }
+
+! CHECK-NEXT:   %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]]
+! CHECK-NEXT:   omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])
+
+! CHECK-NEXT: }
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-pointer.f90 b/flang/test/Lower/OpenMP/delayed-privatization-pointer.f90
new file mode 100644
index 00000000000000..796e4720c8c954
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-pointer.f90
@@ -0,0 +1,31 @@
+! Test delayed privatization for pointers: `private`.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN:   -o - %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 |\
+! RUN:   FileCheck %s
+
+subroutine delayed_privatization_pointer
+  implicit none
+  integer, pointer :: var1
+
+!$omp parallel firstprivate(var1)
+  var1 = 10
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = firstprivate}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.ptr<i32>>>]] alloc {
+
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK-NEXT:   %[[PRIV_ALLOC:.*]] = fir.alloca !fir.box<!fir.ptr<i32>> {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_pointerEvar1"}
+! CHECK-NEXT:   %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]]
+! CHECK-NEXT:   omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])
+
+! CHECK-NEXT: } copy {
+! CHECK: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
+! CHECK-NEXT:    %[[ORIG_BASE_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]]
+ ! CHECK-NEXT:   fir.store %[[ORIG_BASE_VAL]] to %[[PRIV_PRIV_ARG]] : !fir.ref<!fir.box<!fir.ptr<i32>>>
+! CHECK-NEXT:   omp.yield(%[[PRIV_PRIV_ARG]] : [[TYPE]])
+! CHECK-NEXT: }



More information about the flang-commits mailing list