[flang-commits] [flang] [flang][MLIR][OpenMP] Extend delayed privatization for arrays (PR #85023)

Kareem Ergawy via flang-commits flang-commits at lists.llvm.org
Wed Mar 13 01:48:42 PDT 2024


https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/85023

>From 5f37252e1f70e723a3244854b22bbc98f58648f4 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Mon, 11 Mar 2024 04:39:51 -0500
Subject: [PATCH 1/4] [flang][MLIR][OpenMP] Extend delayed privatization for
 scalar allocatables

One more step in extending support for delayed privatization. This diff
adds support for scalar allocatables.
---
 .../flang/Optimizer/Builder/BoxValue.h        | 11 +++++
 flang/lib/Lower/Bridge.cpp                    | 24 ++++++-----
 .../lib/Lower/OpenMP/DataSharingProcessor.cpp |  8 ++--
 ...privatization-allocatable-firstprivate.f90 | 33 +++++++++++++++
 ...ayed-privatization-allocatable-private.f90 | 40 +++++++++++++++++++
 5 files changed, 103 insertions(+), 13 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
 create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90

diff --git a/flang/include/flang/Optimizer/Builder/BoxValue.h b/flang/include/flang/Optimizer/Builder/BoxValue.h
index 2fed2d48a7a080..9b84bfb5ab0bf7 100644
--- a/flang/include/flang/Optimizer/Builder/BoxValue.h
+++ b/flang/include/flang/Optimizer/Builder/BoxValue.h
@@ -535,6 +535,17 @@ class ExtendedValue : public details::matcher<ExtendedValue> {
 
   const VT &matchee() const { return box; }
 
+  /// Clone an ExtendedValue to a new instance changing the base address.
+  ///
+  /// TODO So far, this is only a shallow clone; only the base address is
+  /// replaced. This will probably be extended to implement deep cloning to
+  /// support scenarios such as delayed privatization for ArrayBoxValue's.
+  ExtendedValue clone(mlir::Value newBase) const {
+    return match(
+        [&](const UnboxedValue &box) -> ExtendedValue { return newBase; },
+        [&](const auto &box) -> ExtendedValue { return box.clone(newBase); });
+  }
+
 private:
   VT box;
 };
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index a668ba4116faab..5fa914d087f020 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -748,7 +748,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
 
   void copyVar(mlir::Location loc, mlir::Value dst,
                mlir::Value src) override final {
-    copyVarHLFIR(loc, dst, src);
+    copyVarHLFIR(loc, Fortran::lower::SymbolBox::Intrinsic{dst},
+                 Fortran::lower::SymbolBox::Intrinsic{src});
   }
 
   void copyHostAssociateVar(
@@ -1009,10 +1010,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       // `omp.private`'s `alloc` block. If this is the case, we return this
       // `SymbolBox::Intrinsic` value.
       if (Fortran::lower::SymbolBox v = symMap->lookupSymbol(sym))
-        return v.match(
-            [&](const Fortran::lower::SymbolBox::Intrinsic &)
-                -> Fortran::lower::SymbolBox { return v; },
-            [](const auto &) -> Fortran::lower::SymbolBox { return {}; });
+        return v;
 
       return {};
     }
@@ -1060,15 +1058,16 @@ class FirConverter : public Fortran::lower::AbstractConverter {
                const Fortran::lower::SymbolBox &rhs_sb) {
     mlir::Location loc = genLocation(sym.name());
     if (lowerToHighLevelFIR())
-      copyVarHLFIR(loc, lhs_sb.getAddr(), rhs_sb.getAddr());
+      copyVarHLFIR(loc, lhs_sb, rhs_sb);
     else
       copyVarFIR(loc, sym, lhs_sb, rhs_sb);
   }
 
-  void copyVarHLFIR(mlir::Location loc, mlir::Value dst, mlir::Value src) {
+  void copyVarHLFIR(mlir::Location loc, Fortran::lower::SymbolBox dst,
+                    Fortran::lower::SymbolBox src) {
     assert(lowerToHighLevelFIR());
-    hlfir::Entity lhs{dst};
-    hlfir::Entity rhs{src};
+    hlfir::Entity lhs{dst.getAddr()};
+    hlfir::Entity rhs{src.getAddr()};
     // Temporary_lhs is set to true in hlfir.assign below to avoid user
     // assignment to be used and finalization to be called on the LHS.
     // This may or may not be correct but mimics the current behaviour
@@ -1082,7 +1081,12 @@ class FirConverter : public Fortran::lower::AbstractConverter {
           /*keepLhsLengthInAllocatableAssignment=*/false,
           /*temporary_lhs=*/true);
     };
-    if (lhs.isAllocatable()) {
+
+    bool isBoxAllocatable = dst.match(
+        [](const fir::MutableBoxValue &box) { return box.isAllocatable(); },
+        [](const auto &box) { return false; });
+
+    if (isBoxAllocatable) {
       // Deep copy allocatable if it is allocated.
       // Note that when allocated, the RHS is already allocated with the LHS
       // shape for copy on entry in createHostAssociateVarClone.
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 717b8cc0276a30..bee533949c6564 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -50,6 +50,7 @@ void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) {
 }
 
 void DataSharingProcessor::insertDeallocs() {
+  // TODO Extend delayed privatization to include a `dealloc` region?
   for (const Fortran::semantics::Symbol *sym : privatizedSymbols)
     if (Fortran::semantics::IsAllocatable(sym->GetUltimate())) {
       converter.createHostAssociateVarCloneDealloc(*sym);
@@ -376,6 +377,7 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
         symLoc, uniquePrivatizerName, symType,
         isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate
                        : mlir::omp::DataSharingClauseType::Private);
+    fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym);
 
     symTable->pushScope();
 
@@ -386,7 +388,7 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
           &allocRegion, /*insertPt=*/{}, symType, symLoc);
 
       firOpBuilder.setInsertionPointToEnd(allocEntryBlock);
-      symTable->addSymbol(*sym, allocRegion.getArgument(0));
+      symTable->addSymbol(*sym, symExV.clone(allocRegion.getArgument(0)));
       symTable->pushScope();
       cloneSymbol(sym);
       firOpBuilder.create<mlir::omp::YieldOp>(
@@ -403,10 +405,10 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
       mlir::Block *copyEntryBlock = firOpBuilder.createBlock(
           &copyRegion, /*insertPt=*/{}, {symType, symType}, {symLoc, symLoc});
       firOpBuilder.setInsertionPointToEnd(copyEntryBlock);
-      symTable->addSymbol(*sym, copyRegion.getArgument(0),
+      symTable->addSymbol(*sym, symExV.clone(copyRegion.getArgument(0)),
                           /*force=*/true);
       symTable->pushScope();
-      symTable->addSymbol(*sym, copyRegion.getArgument(1));
+      symTable->addSymbol(*sym, symExV.clone(copyRegion.getArgument(1)));
       auto ip = firOpBuilder.saveInsertionPoint();
       copyFirstPrivateSymbol(sym, &ip);
 
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
new file mode 100644
index 00000000000000..545221b4c1abfe
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
@@ -0,0 +1,33 @@
+! Test delayed privatization for allocatables: `firstprivate`.
+
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+
+subroutine delayed_privatization_allocatable
+  implicit none
+  integer, allocatable :: var1
+
+!$omp parallel firstprivate(var1)
+  var1 = 10
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = firstprivate}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.heap<i32>>>]] alloc {
+
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK: } copy {
+! CHECK: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK-NEXT:  %[[PRIV_BASE_VAL:.*]] = fir.load %[[PRIV_PRIV_ARG]]
+! CHECK-NEXT:  %[[PRIV_BASE_BOX:.*]] = fir.box_addr %[[PRIV_BASE_VAL]]
+! CHECK-NEXT:  %[[PRIV_BASE_ADDR:.*]] = fir.convert %[[PRIV_BASE_BOX]]
+! CHECK-NEXT:  %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT:  %[[COPY_COND:.*]] = arith.cmpi ne, %[[PRIV_BASE_ADDR]], %[[C0]] : i64
+
+! CHECK-NEXT:  fir.if %[[COPY_COND]] {
+! CHECK-NEXT:    %[[ORIG_BASE_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]]
+! CHECK-NEXT:    %[[ORIG_BASE_ADDR:.*]] = fir.box_addr %[[ORIG_BASE_VAL]]
+! CHECK-NEXT:    %[[ORIG_BASE_LD:.*]] = fir.load %[[ORIG_BASE_ADDR]]
+! CHECK-NEXT:    hlfir.assign %[[ORIG_BASE_LD]] to %[[PRIV_BASE_BOX]] temporary_lhs
+! CHECK-NEXT:  }
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
new file mode 100644
index 00000000000000..83fb516879ff20
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
@@ -0,0 +1,40 @@
+! Test delayed privatization for allocatables: `private`.
+
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+
+subroutine delayed_privatization_allocatable
+  implicit none
+  integer, allocatable :: var1
+
+!$omp parallel private(var1)
+  var1 = 10
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = private}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.heap<i32>>>]] alloc {
+
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK-NEXT:   %[[PRIV_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<i32>> {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_allocatableEvar1"}
+
+! CHECK-NEXT:   %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK-NEXT:   %[[PRIV_ARG_BOX:.*]] = fir.box_addr %[[PRIV_ARG_VAL]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+! CHECK-NEXT:   %[[PRIV_ARG_ADDR:.*]] = fir.convert %[[PRIV_ARG_BOX]] : (!fir.heap<i32>) -> i64
+! CHECK-NEXT:   %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT:   %[[ALLOC_COND:.*]] = arith.cmpi ne, %[[PRIV_ARG_ADDR]], %[[C0]] : i64
+
+! CHECK-NEXT:   fir.if %[[ALLOC_COND]] {
+! CHECK-NEXT:     %[[PRIV_ALLOCMEM:.*]] = fir.allocmem i32 {fir.must_be_heap = true, uniq_name = "_QFdelayed_privatization_allocatableEvar1.alloc"}
+! CHECK-NEXT:     %[[PRIV_ALLOCMEM_BOX:.*]] = fir.embox %[[PRIV_ALLOCMEM]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+! CHECK-NEXT:     fir.store %[[PRIV_ALLOCMEM_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK-NEXT:   } else {
+! CHECK-NEXT:     %[[ZERO_BITS:.*]] = fir.zero_bits !fir.heap<i32>
+! CHECK-NEXT:     %[[ZERO_BOX:.*]] = fir.embox %[[ZERO_BITS]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+! CHECK-NEXT:     fir.store %[[ZERO_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK-NEXT:   }
+
+! CHECK-NEXT:   %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]]
+! CHECK-NEXT:   omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])
+
+! CHECK-NEXT: }

>From 0888d7a4736c3eac4f4990d1b2317f61b4fc6e56 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Tue, 12 Mar 2024 03:02:33 -0500
Subject: [PATCH 2/4] Handle review comments.

---
 flang/include/flang/Optimizer/Builder/BoxValue.h      | 11 -----------
 flang/lib/Lower/OpenMP/DataSharingProcessor.cpp       | 11 +++++++----
 ...delayed-privatization-allocatable-firstprivate.f90 |  5 ++++-
 .../delayed-privatization-allocatable-private.f90     |  5 ++++-
 4 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/BoxValue.h b/flang/include/flang/Optimizer/Builder/BoxValue.h
index 9b84bfb5ab0bf7..2fed2d48a7a080 100644
--- a/flang/include/flang/Optimizer/Builder/BoxValue.h
+++ b/flang/include/flang/Optimizer/Builder/BoxValue.h
@@ -535,17 +535,6 @@ class ExtendedValue : public details::matcher<ExtendedValue> {
 
   const VT &matchee() const { return box; }
 
-  /// Clone an ExtendedValue to a new instance changing the base address.
-  ///
-  /// TODO So far, this is only a shallow clone; only the base address is
-  /// replaced. This will probably be extended to implement deep cloning to
-  /// support scenarios such as delayed privatization for ArrayBoxValue's.
-  ExtendedValue clone(mlir::Value newBase) const {
-    return match(
-        [&](const UnboxedValue &box) -> ExtendedValue { return newBase; },
-        [&](const auto &box) -> ExtendedValue { return box.clone(newBase); });
-  }
-
 private:
   VT box;
 };
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index bee533949c6564..ec391ec8b06f7e 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -50,7 +50,7 @@ void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) {
 }
 
 void DataSharingProcessor::insertDeallocs() {
-  // TODO Extend delayed privatization to include a `dealloc` region?
+  // TODO Extend delayed privatization to include a `dealloc` region.
   for (const Fortran::semantics::Symbol *sym : privatizedSymbols)
     if (Fortran::semantics::IsAllocatable(sym->GetUltimate())) {
       converter.createHostAssociateVarCloneDealloc(*sym);
@@ -388,7 +388,8 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
           &allocRegion, /*insertPt=*/{}, symType, symLoc);
 
       firOpBuilder.setInsertionPointToEnd(allocEntryBlock);
-      symTable->addSymbol(*sym, symExV.clone(allocRegion.getArgument(0)));
+      symTable->addSymbol(*sym,
+                          fir::substBase(symExV, allocRegion.getArgument(0)));
       symTable->pushScope();
       cloneSymbol(sym);
       firOpBuilder.create<mlir::omp::YieldOp>(
@@ -405,10 +406,12 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
       mlir::Block *copyEntryBlock = firOpBuilder.createBlock(
           &copyRegion, /*insertPt=*/{}, {symType, symType}, {symLoc, symLoc});
       firOpBuilder.setInsertionPointToEnd(copyEntryBlock);
-      symTable->addSymbol(*sym, symExV.clone(copyRegion.getArgument(0)),
+      symTable->addSymbol(*sym,
+                          fir::substBase(symExV, copyRegion.getArgument(0)),
                           /*force=*/true);
       symTable->pushScope();
-      symTable->addSymbol(*sym, symExV.clone(copyRegion.getArgument(1)));
+      symTable->addSymbol(*sym,
+                          fir::substBase(symExV, copyRegion.getArgument(1)));
       auto ip = firOpBuilder.saveInsertionPoint();
       copyFirstPrivateSymbol(sym, &ip);
 
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
index 545221b4c1abfe..aa835f82b2730b 100644
--- a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
@@ -1,6 +1,9 @@
 ! Test delayed privatization for allocatables: `firstprivate`.
 
-! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN:   -o - %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 |\
+! RUN:   FileCheck %s
 
 subroutine delayed_privatization_allocatable
   implicit none
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
index 83fb516879ff20..cc1818b00b809c 100644
--- a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
@@ -1,6 +1,9 @@
 ! Test delayed privatization for allocatables: `private`.
 
-! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN:   -o - %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 |\
+! RUN:   FileCheck %s
 
 subroutine delayed_privatization_allocatable
   implicit none

>From 306e090d531cc424d0d4495b4fa3b2b993c5a7c3 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Tue, 12 Mar 2024 04:21:01 -0500
Subject: [PATCH 3/4] Add support for scalar pointers.

---
 flang/lib/Lower/Bridge.cpp                    | 12 ++++++-
 .../OpenMP/delayed-privatization-pointer.f90  | 31 +++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)
 create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-pointer.f90

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 5fa914d087f020..bc2e2adcb57c7b 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -1084,6 +1084,16 @@ class FirConverter : public Fortran::lower::AbstractConverter {
 
     bool isBoxAllocatable = dst.match(
         [](const fir::MutableBoxValue &box) { return box.isAllocatable(); },
+        [](const fir::FortranVariableOpInterface &box) {
+          return fir::FortranVariableOpInterface(box).isAllocatable();
+        },
+        [](const auto &box) { return false; });
+
+    bool isBoxPointer = dst.match(
+        [](const fir::MutableBoxValue &box) { return box.isPointer(); },
+        [](const fir::FortranVariableOpInterface &box) {
+          return fir::FortranVariableOpInterface(box).isPointer();
+        },
         [](const auto &box) { return false; });
 
     if (isBoxAllocatable) {
@@ -1101,7 +1111,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
             copyData(lhs, rhs);
           })
           .end();
-    } else if (lhs.isPointer()) {
+    } else if (isBoxPointer) {
       // Set LHS target to the target of RHS (do not copy the RHS
       // target data into the LHS target storage).
       auto loadVal = builder->create<fir::LoadOp>(loc, rhs);
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-pointer.f90 b/flang/test/Lower/OpenMP/delayed-privatization-pointer.f90
new file mode 100644
index 00000000000000..796e4720c8c954
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-pointer.f90
@@ -0,0 +1,31 @@
+! Test delayed privatization for pointers: `private`.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN:   -o - %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 |\
+! RUN:   FileCheck %s
+
+subroutine delayed_privatization_pointer
+  implicit none
+  integer, pointer :: var1
+
+!$omp parallel firstprivate(var1)
+  var1 = 10
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = firstprivate}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.ptr<i32>>>]] alloc {
+
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK-NEXT:   %[[PRIV_ALLOC:.*]] = fir.alloca !fir.box<!fir.ptr<i32>> {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_pointerEvar1"}
+! CHECK-NEXT:   %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]]
+! CHECK-NEXT:   omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])
+
+! CHECK-NEXT: } copy {
+! CHECK: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
+! CHECK-NEXT:    %[[ORIG_BASE_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]]
+ ! CHECK-NEXT:   fir.store %[[ORIG_BASE_VAL]] to %[[PRIV_PRIV_ARG]] : !fir.ref<!fir.box<!fir.ptr<i32>>>
+! CHECK-NEXT:   omp.yield(%[[PRIV_PRIV_ARG]] : [[TYPE]])
+! CHECK-NEXT: }

>From 523850ee096280a67e92e8f5f5ac3f0198bc95a3 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Wed, 13 Mar 2024 00:15:46 -0500
Subject: [PATCH 4/4] [flang][MLIR][OpenMP] Extend delayed privatization for
 arrays

One more step in delayed privatization, this PR extends support for
arrays. In the delayed privatizer, a `fir.box_dims` operation is emitted
to retrieve the array bounds and extents. The result of these
`fir.box_dims` and the privatizer argument, are then used to create a
new `ExtendedValue`.
---
 flang/lib/Lower/Bridge.cpp                    |  2 +
 .../lib/Lower/OpenMP/DataSharingProcessor.cpp | 27 ++++++-
 ...elayed-privatization-allocatable-array.f90 | 67 +++++++++++++++++
 .../OpenMP/delayed-privatization-array.f90    | 75 +++++++++++++++++++
 4 files changed, 169 insertions(+), 2 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-allocatable-array.f90
 create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-array.f90

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index bc2e2adcb57c7b..7c2c88246322a1 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -620,6 +620,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     mlir::Type hSymType = genType(hsym);
     Fortran::lower::SymbolBox hsb =
         lookupSymbol(hsym, /*symMap=*/nullptr, /*forceHlfirBase=*/true);
+    llvm::errs() << ">>>> hsb: " << hsb << "\n";
 
     auto allocate = [&](llvm::ArrayRef<mlir::Value> shape,
                         llvm::ArrayRef<mlir::Value> typeParams) -> mlir::Value {
@@ -635,6 +636,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     };
 
     fir::ExtendedValue hexv = symBoxToExtendedValue(hsb);
+    llvm::errs() << ">>>> hexv: " << hexv << "\n";
     fir::ExtendedValue exv = hexv.match(
         [&](const fir::BoxValue &box) -> fir::ExtendedValue {
           const Fortran::semantics::DeclTypeSpec *type = sym.GetType();
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index ec391ec8b06f7e..2412c13e938075 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -388,8 +388,31 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
           &allocRegion, /*insertPt=*/{}, symType, symLoc);
 
       firOpBuilder.setInsertionPointToEnd(allocEntryBlock);
-      symTable->addSymbol(*sym,
-                          fir::substBase(symExV, allocRegion.getArgument(0)));
+
+      fir::ExtendedValue localExV = symExV.match(
+          [&](const fir::ArrayBoxValue &box) -> fir::ExtendedValue {
+            auto idxTy = firOpBuilder.getIndexType();
+            llvm::SmallVector<mlir::Value> extents;
+            llvm::SmallVector<mlir::Value> lBounds;
+
+            for (unsigned dim = 0; dim < box.getExtents().size(); ++dim) {
+              mlir::Value dimVal =
+                  firOpBuilder.createIntegerConstant(symLoc, idxTy, dim);
+              fir::BoxDimsOp dimInfo = firOpBuilder.create<fir::BoxDimsOp>(
+                  symLoc, idxTy, idxTy, idxTy, allocRegion.getArgument(0),
+                  dimVal);
+              extents.push_back(dimInfo.getExtent());
+              lBounds.push_back(dimInfo.getLowerBound());
+            }
+
+            return fir::ArrayBoxValue(allocRegion.getArgument(0), extents,
+                                      lBounds);
+          },
+          [&](const auto &box) -> fir::ExtendedValue {
+            return fir::substBase(symExV, allocRegion.getArgument(0));
+          });
+
+      symTable->addSymbol(*sym, localExV);
       symTable->pushScope();
       cloneSymbol(sym);
       firOpBuilder.create<mlir::omp::YieldOp>(
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-array.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-array.f90
new file mode 100644
index 00000000000000..47e163014fe868
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-array.f90
@@ -0,0 +1,67 @@
+! Test delayed privatization for allocatable arrays.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN:   -o - %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 |\
+! RUN:   FileCheck %s
+
+subroutine delayed_privatization_private(var1, l1)
+  implicit none
+  integer(8):: l1
+  integer, allocatable, dimension(:) :: var1
+
+!$omp parallel firstprivate(var1)
+  var1(l1 + 1) = 10
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = firstprivate}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.heap<!fir.array<\?xi32>>>>]] alloc {
+
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+! CHECK-NEXT:   %[[PRIV_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<{{\?}}xi32>>> {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_privateEvar1"}
+
+! CHECK-NEXT:   %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]]
+! CHECK-NEXT:   %[[PRIV_ARG_BOX:.*]] = fir.box_addr %[[PRIV_ARG_VAL]]
+! CHECK-NEXT:   %[[PRIV_ARG_ADDR:.*]] = fir.convert %[[PRIV_ARG_BOX]]
+! CHECK-NEXT:   %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT:   %[[ALLOC_COND:.*]] = arith.cmpi ne, %[[PRIV_ARG_ADDR]], %[[C0]] : i64
+
+! CHECK-NEXT:   fir.if %[[ALLOC_COND]] {
+! CHECK-NEXT:     %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]] : [[TYPE]]
+! CHECK-NEXT:     %[[C0:.*]] = arith.constant 0 : index
+! CHECK-NEXT:     %[[DIMS:.*]]:3 = fir.box_dims %[[PRIV_ARG_VAL]], %[[C0]]
+! CHECK-NEXT:     fir.box_addr %[[PRIV_ARG_VAL]]
+! CHECK-NEXT:     %[[C0_2:.*]] = arith.constant 0 : index 
+! CHECK-NEXT:     %[[CMP:.*]] = arith.cmpi sgt, %[[DIMS]]#1, %[[C0_2]] : index
+! CHECK-NEXT:     %[[SELECT:.*]] = arith.select %[[CMP]], %[[DIMS]]#1, %[[C0_2]] : index
+! CHECK-NEXT:     %[[MEM:.*]] = fir.allocmem !fir.array<?xi32>, %[[SELECT]]
+! CHECK-NEXT:     %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[DIMS]]#0, %[[SELECT]] : (index, index) -> !fir.shapeshift<1>
+! CHECK-NEXT:     %[[EMBOX:.*]] = fir.embox %[[MEM]](%[[SHAPE_SHIFT]])
+! CHECK-NEXT:     fir.store %[[EMBOX]] to %[[PRIV_ALLOC]]
+! CHECK-NEXT:   } else {
+! CHECK-NEXT:     %[[ZEROS:.*]] = fir.zero_bits
+! CHECK-NEXT:     %[[C0_3:.*]] = arith.constant 0 : index
+! CHECK-NEXT:     %[[SHAPE:.*]] = fir.shape %[[C0_3]] : (index) -> !fir.shape<1>
+! CHECK-NEXT:     %[[EMBOX_2:.*]] = fir.embox %[[ZEROS]](%[[SHAPE]])
+! CHECK-NEXT:     fir.store %[[EMBOX_2]] to %[[PRIV_ALLOC]]
+! CHECK-NEXT:   }
+
+! CHECK-NEXT:   hlfir.declare
+! CHECK-NEXT:   omp.yield
+
+! CHECK-NEXT: } copy {
+! CHECK-NEXT: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
+! CHECK-NEXT:  %[[PRIV_BASE_VAL:.*]] = fir.load %[[PRIV_PRIV_ARG]]
+! CHECK-NEXT:  %[[PRIV_BASE_BOX:.*]] = fir.box_addr %[[PRIV_BASE_VAL]]
+! CHECK-NEXT:  %[[PRIV_BASE_ADDR:.*]] = fir.convert %[[PRIV_BASE_BOX]]
+! CHECK-NEXT:  %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT:  %[[COPY_COND:.*]] = arith.cmpi ne, %[[PRIV_BASE_ADDR]], %[[C0]] : i64
+
+
+! CHECK-NEXT:  fir.if %[[COPY_COND]] {
+! CHECK-NEXT:    %[[PRIV_ORIG_ARG_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]]
+! CHECK-NEXT:    hlfir.assign %[[PRIV_ORIG_ARG_VAL]] to %[[PRIV_BASE_VAL]] temporary_lhs
+! CHECK-NEXT:   }
+! CHECK-NEXT:   omp.yield
+! CHECK-NEXT: }
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-array.f90 b/flang/test/Lower/OpenMP/delayed-privatization-array.f90
new file mode 100644
index 00000000000000..23b61563781eb1
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-array.f90
@@ -0,0 +1,75 @@
+! Test delayed privatization for arrays.
+
+! RUN: split-file %s %t
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN:   -o - %t/one_dim_array.f90 2>&1 | FileCheck %s --check-prefix=ONE_DIM
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - \
+! RUN:   %t/one_dim_array.f90 2>&1 | FileCheck %s --check-prefix=ONE_DIM
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN:   -o - %t/two_dim_array.f90 2>&1 | FileCheck %s --check-prefix=TWO_DIM
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %t/two_dim_array.f90 2>&1 |\
+! RUN:   FileCheck %s --check-prefix=TWO_DIM
+
+!--- one_dim_array.f90
+subroutine delayed_privatization_private(var1, l1, u1)
+  implicit none
+  integer(8):: l1, u1
+  integer, dimension(l1:u1) :: var1
+
+!$omp parallel firstprivate(var1)
+  var1(l1 + 1) = 10
+!$omp end parallel
+end subroutine
+
+! ONE_DIM-LABEL: omp.private {type = firstprivate}
+! ONE_DIM-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.box<!fir.array<\?xi32>>]] alloc {
+
+! ONE_DIM-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+
+! ONE_DIM-NEXT:   %[[C0:.*]] = arith.constant 0 : index
+! ONE_DIM-NEXT:   %[[DIMS:.*]]:3 = fir.box_dims %[[PRIV_ARG]], %[[C0]] : ([[TYPE]], index) -> (index, index, index)
+! ONE_DIM-NEXT:   %[[PRIV_ALLOCA:.*]] = fir.alloca !fir.array<{{\?}}xi32>, %[[DIMS]]#1 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! ONE_DIM-NEXT:   %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[DIMS]]#0, %[[DIMS]]#1 : (index, index) -> !fir.shapeshift<1>
+! ONE_DIM-NEXT:   %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOCA]](%[[SHAPE_SHIFT]]) {uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! ONE_DIM-NEXT:  omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])
+
+! ONE_DIM-NEXT: } copy {
+! ONE_DIM-NEXT: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
+! ONE_DIM-NEXT:  hlfir.assign %[[PRIV_ORIG_ARG]] to %[[PRIV_PRIV_ARG]] temporary_lhs
+! ONE_DIM-NEXT:   omp.yield(%[[PRIV_PRIV_ARG]] : [[TYPE]])
+! ONE_DIM-NEXT: }
+
+!--- two_dim_array.f90
+subroutine delayed_privatization_private(var1, l1, u1, l2, u2)
+  implicit none
+  integer(8):: l1, u1, l2, u2
+  integer, dimension(l1:u1, l2:u2) :: var1
+
+!$omp parallel firstprivate(var1)
+  var1(l1 + 1, u2) = 10
+!$omp end parallel
+end subroutine
+
+! TWO_DIM-LABEL: omp.private {type = firstprivate}
+! TWO_DIM-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.box<!fir.array<\?x\?xi32>>]] alloc {
+
+! TWO_DIM-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+! TWO_DIM-NEXT:   %[[C0:.*]] = arith.constant 0 : index
+! TWO_DIM-NEXT:   %[[DIMS0:.*]]:3 = fir.box_dims %[[PRIV_ARG]], %[[C0]] : ([[TYPE]], index) -> (index, index, index)
+
+! TWO_DIM-NEXT:   %[[C1:.*]] = arith.constant 1 : index
+! TWO_DIM-NEXT:   %[[DIMS1:.*]]:3 = fir.box_dims %[[PRIV_ARG]], %[[C1]] : ([[TYPE]], index) -> (index, index, index)
+
+! TWO_DIM-NEXT:   %[[PRIV_ALLOCA:.*]] = fir.alloca !fir.array<{{\?}}x{{\?}}xi32>, %[[DIMS0]]#1, %[[DIMS1]]#1 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! TWO_DIM-NEXT:   %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[DIMS0]]#0, %[[DIMS0]]#1, %[[DIMS1]]#0, %[[DIMS1]]#1 : (index, index, index, index) -> !fir.shapeshift<2>
+
+! TWO_DIM-NEXT:   %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOCA]](%[[SHAPE_SHIFT]]) {uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! TWO_DIM-NEXT:  omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])
+
+! TWO_DIM-NEXT: } copy {
+! TWO_DIM-NEXT: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
+! TWO_DIM-NEXT:  hlfir.assign %[[PRIV_ORIG_ARG]] to %[[PRIV_PRIV_ARG]] temporary_lhs
+! TWO_DIM-NEXT:   omp.yield(%[[PRIV_PRIV_ARG]] : [[TYPE]])
+! TWO_DIM-NEXT: }



More information about the flang-commits mailing list