[flang-commits] [flang] [flang][MLIR][OpenMP] Extend delayed privatization for `CHARACTER` (PR #85369)
Kareem Ergawy via flang-commits
flang-commits at lists.llvm.org
Fri Mar 15 00:48:38 PDT 2024
https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/85369
>From 6c557cf67f9d58bf9a8db9ece8458fc1075c24d3 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Mon, 11 Mar 2024 04:39:51 -0500
Subject: [PATCH 1/3] [flang][MLIR][OpenMP] Extend delayed privatization for
scalar allocatables
One more step in extending support for delayed privatization. This diff
adds support for scalar allocatables.
---
flang/lib/Lower/Bridge.cpp | 36 +++++++++++-----
.../lib/Lower/OpenMP/DataSharingProcessor.cpp | 11 +++--
...privatization-allocatable-firstprivate.f90 | 36 ++++++++++++++++
...ayed-privatization-allocatable-private.f90 | 43 +++++++++++++++++++
.../OpenMP/delayed-privatization-pointer.f90 | 31 +++++++++++++
5 files changed, 143 insertions(+), 14 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-pointer.f90
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index a668ba4116faab..bc2e2adcb57c7b 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -748,7 +748,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
void copyVar(mlir::Location loc, mlir::Value dst,
mlir::Value src) override final {
- copyVarHLFIR(loc, dst, src);
+ copyVarHLFIR(loc, Fortran::lower::SymbolBox::Intrinsic{dst},
+ Fortran::lower::SymbolBox::Intrinsic{src});
}
void copyHostAssociateVar(
@@ -1009,10 +1010,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
// `omp.private`'s `alloc` block. If this is the case, we return this
// `SymbolBox::Intrinsic` value.
if (Fortran::lower::SymbolBox v = symMap->lookupSymbol(sym))
- return v.match(
- [&](const Fortran::lower::SymbolBox::Intrinsic &)
- -> Fortran::lower::SymbolBox { return v; },
- [](const auto &) -> Fortran::lower::SymbolBox { return {}; });
+ return v;
return {};
}
@@ -1060,15 +1058,16 @@ class FirConverter : public Fortran::lower::AbstractConverter {
const Fortran::lower::SymbolBox &rhs_sb) {
mlir::Location loc = genLocation(sym.name());
if (lowerToHighLevelFIR())
- copyVarHLFIR(loc, lhs_sb.getAddr(), rhs_sb.getAddr());
+ copyVarHLFIR(loc, lhs_sb, rhs_sb);
else
copyVarFIR(loc, sym, lhs_sb, rhs_sb);
}
- void copyVarHLFIR(mlir::Location loc, mlir::Value dst, mlir::Value src) {
+ void copyVarHLFIR(mlir::Location loc, Fortran::lower::SymbolBox dst,
+ Fortran::lower::SymbolBox src) {
assert(lowerToHighLevelFIR());
- hlfir::Entity lhs{dst};
- hlfir::Entity rhs{src};
+ hlfir::Entity lhs{dst.getAddr()};
+ hlfir::Entity rhs{src.getAddr()};
// Temporary_lhs is set to true in hlfir.assign below to avoid user
// assignment to be used and finalization to be called on the LHS.
// This may or may not be correct but mimics the current behaviour
@@ -1082,7 +1081,22 @@ class FirConverter : public Fortran::lower::AbstractConverter {
/*keepLhsLengthInAllocatableAssignment=*/false,
/*temporary_lhs=*/true);
};
- if (lhs.isAllocatable()) {
+
+ bool isBoxAllocatable = dst.match(
+ [](const fir::MutableBoxValue &box) { return box.isAllocatable(); },
+ [](const fir::FortranVariableOpInterface &box) {
+ return fir::FortranVariableOpInterface(box).isAllocatable();
+ },
+ [](const auto &box) { return false; });
+
+ bool isBoxPointer = dst.match(
+ [](const fir::MutableBoxValue &box) { return box.isPointer(); },
+ [](const fir::FortranVariableOpInterface &box) {
+ return fir::FortranVariableOpInterface(box).isPointer();
+ },
+ [](const auto &box) { return false; });
+
+ if (isBoxAllocatable) {
// Deep copy allocatable if it is allocated.
// Note that when allocated, the RHS is already allocated with the LHS
// shape for copy on entry in createHostAssociateVarClone.
@@ -1097,7 +1111,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
copyData(lhs, rhs);
})
.end();
- } else if (lhs.isPointer()) {
+ } else if (isBoxPointer) {
// Set LHS target to the target of RHS (do not copy the RHS
// target data into the LHS target storage).
auto loadVal = builder->create<fir::LoadOp>(loc, rhs);
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 717b8cc0276a30..ec391ec8b06f7e 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -50,6 +50,7 @@ void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) {
}
void DataSharingProcessor::insertDeallocs() {
+ // TODO Extend delayed privatization to include a `dealloc` region.
for (const Fortran::semantics::Symbol *sym : privatizedSymbols)
if (Fortran::semantics::IsAllocatable(sym->GetUltimate())) {
converter.createHostAssociateVarCloneDealloc(*sym);
@@ -376,6 +377,7 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
symLoc, uniquePrivatizerName, symType,
isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate
: mlir::omp::DataSharingClauseType::Private);
+ fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym);
symTable->pushScope();
@@ -386,7 +388,8 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
&allocRegion, /*insertPt=*/{}, symType, symLoc);
firOpBuilder.setInsertionPointToEnd(allocEntryBlock);
- symTable->addSymbol(*sym, allocRegion.getArgument(0));
+ symTable->addSymbol(*sym,
+ fir::substBase(symExV, allocRegion.getArgument(0)));
symTable->pushScope();
cloneSymbol(sym);
firOpBuilder.create<mlir::omp::YieldOp>(
@@ -403,10 +406,12 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
mlir::Block *copyEntryBlock = firOpBuilder.createBlock(
©Region, /*insertPt=*/{}, {symType, symType}, {symLoc, symLoc});
firOpBuilder.setInsertionPointToEnd(copyEntryBlock);
- symTable->addSymbol(*sym, copyRegion.getArgument(0),
+ symTable->addSymbol(*sym,
+ fir::substBase(symExV, copyRegion.getArgument(0)),
/*force=*/true);
symTable->pushScope();
- symTable->addSymbol(*sym, copyRegion.getArgument(1));
+ symTable->addSymbol(*sym,
+ fir::substBase(symExV, copyRegion.getArgument(1)));
auto ip = firOpBuilder.saveInsertionPoint();
copyFirstPrivateSymbol(sym, &ip);
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
new file mode 100644
index 00000000000000..aa835f82b2730b
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90
@@ -0,0 +1,36 @@
+! Test delayed privatization for allocatables: `firstprivate`.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN: -o - %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 |\
+! RUN: FileCheck %s
+
+subroutine delayed_privatization_allocatable
+ implicit none
+ integer, allocatable :: var1
+
+!$omp parallel firstprivate(var1)
+ var1 = 10
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = firstprivate}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.heap<i32>>>]] alloc {
+
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK: } copy {
+! CHECK: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK-NEXT: %[[PRIV_BASE_VAL:.*]] = fir.load %[[PRIV_PRIV_ARG]]
+! CHECK-NEXT: %[[PRIV_BASE_BOX:.*]] = fir.box_addr %[[PRIV_BASE_VAL]]
+! CHECK-NEXT: %[[PRIV_BASE_ADDR:.*]] = fir.convert %[[PRIV_BASE_BOX]]
+! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT: %[[COPY_COND:.*]] = arith.cmpi ne, %[[PRIV_BASE_ADDR]], %[[C0]] : i64
+
+! CHECK-NEXT: fir.if %[[COPY_COND]] {
+! CHECK-NEXT: %[[ORIG_BASE_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]]
+! CHECK-NEXT: %[[ORIG_BASE_ADDR:.*]] = fir.box_addr %[[ORIG_BASE_VAL]]
+! CHECK-NEXT: %[[ORIG_BASE_LD:.*]] = fir.load %[[ORIG_BASE_ADDR]]
+! CHECK-NEXT: hlfir.assign %[[ORIG_BASE_LD]] to %[[PRIV_BASE_BOX]] temporary_lhs
+! CHECK-NEXT: }
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
new file mode 100644
index 00000000000000..cc1818b00b809c
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-private.f90
@@ -0,0 +1,43 @@
+! Test delayed privatization for allocatables: `private`.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN: -o - %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 |\
+! RUN: FileCheck %s
+
+subroutine delayed_privatization_allocatable
+ implicit none
+ integer, allocatable :: var1
+
+!$omp parallel private(var1)
+ var1 = 10
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = private}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.heap<i32>>>]] alloc {
+
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<i32>> {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_allocatableEvar1"}
+
+! CHECK-NEXT: %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK-NEXT: %[[PRIV_ARG_BOX:.*]] = fir.box_addr %[[PRIV_ARG_VAL]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+! CHECK-NEXT: %[[PRIV_ARG_ADDR:.*]] = fir.convert %[[PRIV_ARG_BOX]] : (!fir.heap<i32>) -> i64
+! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT: %[[ALLOC_COND:.*]] = arith.cmpi ne, %[[PRIV_ARG_ADDR]], %[[C0]] : i64
+
+! CHECK-NEXT: fir.if %[[ALLOC_COND]] {
+! CHECK-NEXT: %[[PRIV_ALLOCMEM:.*]] = fir.allocmem i32 {fir.must_be_heap = true, uniq_name = "_QFdelayed_privatization_allocatableEvar1.alloc"}
+! CHECK-NEXT: %[[PRIV_ALLOCMEM_BOX:.*]] = fir.embox %[[PRIV_ALLOCMEM]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+! CHECK-NEXT: fir.store %[[PRIV_ALLOCMEM_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK-NEXT: } else {
+! CHECK-NEXT: %[[ZERO_BITS:.*]] = fir.zero_bits !fir.heap<i32>
+! CHECK-NEXT: %[[ZERO_BOX:.*]] = fir.embox %[[ZERO_BITS]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+! CHECK-NEXT: fir.store %[[ZERO_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK-NEXT: }
+
+! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]]
+! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])
+
+! CHECK-NEXT: }
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-pointer.f90 b/flang/test/Lower/OpenMP/delayed-privatization-pointer.f90
new file mode 100644
index 00000000000000..796e4720c8c954
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-pointer.f90
@@ -0,0 +1,31 @@
+! Test delayed privatization for pointers: `private`.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN: -o - %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 |\
+! RUN: FileCheck %s
+
+subroutine delayed_privatization_pointer
+ implicit none
+ integer, pointer :: var1
+
+!$omp parallel firstprivate(var1)
+ var1 = 10
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = firstprivate}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.ptr<i32>>>]] alloc {
+
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.box<!fir.ptr<i32>> {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_pointerEvar1"}
+! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]]
+! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])
+
+! CHECK-NEXT: } copy {
+! CHECK: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
+! CHECK-NEXT: %[[ORIG_BASE_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]]
+ ! CHECK-NEXT: fir.store %[[ORIG_BASE_VAL]] to %[[PRIV_PRIV_ARG]] : !fir.ref<!fir.box<!fir.ptr<i32>>>
+! CHECK-NEXT: omp.yield(%[[PRIV_PRIV_ARG]] : [[TYPE]])
+! CHECK-NEXT: }
>From 7e48b5f8b64e61118a366a556c06d0d697c3db6f Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Wed, 13 Mar 2024 00:15:46 -0500
Subject: [PATCH 2/3] [flang][MLIR][OpenMP] Extend delayed privatization for
arrays
One more step in delayed privatization, this PR extends support for
arrays. In the delayed privatizer, a `fir.box_dims` operation is emitted
to retrieve the array bounds and extents. The result of these
`fir.box_dims` and the privatizer argument, are then used to create a
new `ExtendedValue`.
---
.../lib/Lower/OpenMP/DataSharingProcessor.cpp | 27 ++++++-
...elayed-privatization-allocatable-array.f90 | 67 +++++++++++++++++
.../OpenMP/delayed-privatization-array.f90 | 75 +++++++++++++++++++
3 files changed, 167 insertions(+), 2 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-allocatable-array.f90
create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-array.f90
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index ec391ec8b06f7e..2412c13e938075 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -388,8 +388,31 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
&allocRegion, /*insertPt=*/{}, symType, symLoc);
firOpBuilder.setInsertionPointToEnd(allocEntryBlock);
- symTable->addSymbol(*sym,
- fir::substBase(symExV, allocRegion.getArgument(0)));
+
+ fir::ExtendedValue localExV = symExV.match(
+ [&](const fir::ArrayBoxValue &box) -> fir::ExtendedValue {
+ auto idxTy = firOpBuilder.getIndexType();
+ llvm::SmallVector<mlir::Value> extents;
+ llvm::SmallVector<mlir::Value> lBounds;
+
+ for (unsigned dim = 0; dim < box.getExtents().size(); ++dim) {
+ mlir::Value dimVal =
+ firOpBuilder.createIntegerConstant(symLoc, idxTy, dim);
+ fir::BoxDimsOp dimInfo = firOpBuilder.create<fir::BoxDimsOp>(
+ symLoc, idxTy, idxTy, idxTy, allocRegion.getArgument(0),
+ dimVal);
+ extents.push_back(dimInfo.getExtent());
+ lBounds.push_back(dimInfo.getLowerBound());
+ }
+
+ return fir::ArrayBoxValue(allocRegion.getArgument(0), extents,
+ lBounds);
+ },
+ [&](const auto &box) -> fir::ExtendedValue {
+ return fir::substBase(symExV, allocRegion.getArgument(0));
+ });
+
+ symTable->addSymbol(*sym, localExV);
symTable->pushScope();
cloneSymbol(sym);
firOpBuilder.create<mlir::omp::YieldOp>(
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-array.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-array.f90
new file mode 100644
index 00000000000000..47e163014fe868
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-array.f90
@@ -0,0 +1,67 @@
+! Test delayed privatization for allocatable arrays.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN: -o - %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 |\
+! RUN: FileCheck %s
+
+subroutine delayed_privatization_private(var1, l1)
+ implicit none
+ integer(8):: l1
+ integer, allocatable, dimension(:) :: var1
+
+!$omp parallel firstprivate(var1)
+ var1(l1 + 1) = 10
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = firstprivate}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.heap<!fir.array<\?xi32>>>>]] alloc {
+
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<{{\?}}xi32>>> {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_privateEvar1"}
+
+! CHECK-NEXT: %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]]
+! CHECK-NEXT: %[[PRIV_ARG_BOX:.*]] = fir.box_addr %[[PRIV_ARG_VAL]]
+! CHECK-NEXT: %[[PRIV_ARG_ADDR:.*]] = fir.convert %[[PRIV_ARG_BOX]]
+! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT: %[[ALLOC_COND:.*]] = arith.cmpi ne, %[[PRIV_ARG_ADDR]], %[[C0]] : i64
+
+! CHECK-NEXT: fir.if %[[ALLOC_COND]] {
+! CHECK-NEXT: %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]] : [[TYPE]]
+! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : index
+! CHECK-NEXT: %[[DIMS:.*]]:3 = fir.box_dims %[[PRIV_ARG_VAL]], %[[C0]]
+! CHECK-NEXT: fir.box_addr %[[PRIV_ARG_VAL]]
+! CHECK-NEXT: %[[C0_2:.*]] = arith.constant 0 : index
+! CHECK-NEXT: %[[CMP:.*]] = arith.cmpi sgt, %[[DIMS]]#1, %[[C0_2]] : index
+! CHECK-NEXT: %[[SELECT:.*]] = arith.select %[[CMP]], %[[DIMS]]#1, %[[C0_2]] : index
+! CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<?xi32>, %[[SELECT]]
+! CHECK-NEXT: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[DIMS]]#0, %[[SELECT]] : (index, index) -> !fir.shapeshift<1>
+! CHECK-NEXT: %[[EMBOX:.*]] = fir.embox %[[MEM]](%[[SHAPE_SHIFT]])
+! CHECK-NEXT: fir.store %[[EMBOX]] to %[[PRIV_ALLOC]]
+! CHECK-NEXT: } else {
+! CHECK-NEXT: %[[ZEROS:.*]] = fir.zero_bits
+! CHECK-NEXT: %[[C0_3:.*]] = arith.constant 0 : index
+! CHECK-NEXT: %[[SHAPE:.*]] = fir.shape %[[C0_3]] : (index) -> !fir.shape<1>
+! CHECK-NEXT: %[[EMBOX_2:.*]] = fir.embox %[[ZEROS]](%[[SHAPE]])
+! CHECK-NEXT: fir.store %[[EMBOX_2]] to %[[PRIV_ALLOC]]
+! CHECK-NEXT: }
+
+! CHECK-NEXT: hlfir.declare
+! CHECK-NEXT: omp.yield
+
+! CHECK-NEXT: } copy {
+! CHECK-NEXT: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
+! CHECK-NEXT: %[[PRIV_BASE_VAL:.*]] = fir.load %[[PRIV_PRIV_ARG]]
+! CHECK-NEXT: %[[PRIV_BASE_BOX:.*]] = fir.box_addr %[[PRIV_BASE_VAL]]
+! CHECK-NEXT: %[[PRIV_BASE_ADDR:.*]] = fir.convert %[[PRIV_BASE_BOX]]
+! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT: %[[COPY_COND:.*]] = arith.cmpi ne, %[[PRIV_BASE_ADDR]], %[[C0]] : i64
+
+
+! CHECK-NEXT: fir.if %[[COPY_COND]] {
+! CHECK-NEXT: %[[PRIV_ORIG_ARG_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]]
+! CHECK-NEXT: hlfir.assign %[[PRIV_ORIG_ARG_VAL]] to %[[PRIV_BASE_VAL]] temporary_lhs
+! CHECK-NEXT: }
+! CHECK-NEXT: omp.yield
+! CHECK-NEXT: }
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-array.f90 b/flang/test/Lower/OpenMP/delayed-privatization-array.f90
new file mode 100644
index 00000000000000..23b61563781eb1
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-array.f90
@@ -0,0 +1,75 @@
+! Test delayed privatization for arrays.
+
+! RUN: split-file %s %t
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN: -o - %t/one_dim_array.f90 2>&1 | FileCheck %s --check-prefix=ONE_DIM
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - \
+! RUN: %t/one_dim_array.f90 2>&1 | FileCheck %s --check-prefix=ONE_DIM
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN: -o - %t/two_dim_array.f90 2>&1 | FileCheck %s --check-prefix=TWO_DIM
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %t/two_dim_array.f90 2>&1 |\
+! RUN: FileCheck %s --check-prefix=TWO_DIM
+
+!--- one_dim_array.f90
+subroutine delayed_privatization_private(var1, l1, u1)
+ implicit none
+ integer(8):: l1, u1
+ integer, dimension(l1:u1) :: var1
+
+!$omp parallel firstprivate(var1)
+ var1(l1 + 1) = 10
+!$omp end parallel
+end subroutine
+
+! ONE_DIM-LABEL: omp.private {type = firstprivate}
+! ONE_DIM-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.box<!fir.array<\?xi32>>]] alloc {
+
+! ONE_DIM-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+
+! ONE_DIM-NEXT: %[[C0:.*]] = arith.constant 0 : index
+! ONE_DIM-NEXT: %[[DIMS:.*]]:3 = fir.box_dims %[[PRIV_ARG]], %[[C0]] : ([[TYPE]], index) -> (index, index, index)
+! ONE_DIM-NEXT: %[[PRIV_ALLOCA:.*]] = fir.alloca !fir.array<{{\?}}xi32>, %[[DIMS]]#1 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! ONE_DIM-NEXT: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[DIMS]]#0, %[[DIMS]]#1 : (index, index) -> !fir.shapeshift<1>
+! ONE_DIM-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOCA]](%[[SHAPE_SHIFT]]) {uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! ONE_DIM-NEXT: omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])
+
+! ONE_DIM-NEXT: } copy {
+! ONE_DIM-NEXT: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
+! ONE_DIM-NEXT: hlfir.assign %[[PRIV_ORIG_ARG]] to %[[PRIV_PRIV_ARG]] temporary_lhs
+! ONE_DIM-NEXT: omp.yield(%[[PRIV_PRIV_ARG]] : [[TYPE]])
+! ONE_DIM-NEXT: }
+
+!--- two_dim_array.f90
+subroutine delayed_privatization_private(var1, l1, u1, l2, u2)
+ implicit none
+ integer(8):: l1, u1, l2, u2
+ integer, dimension(l1:u1, l2:u2) :: var1
+
+!$omp parallel firstprivate(var1)
+ var1(l1 + 1, u2) = 10
+!$omp end parallel
+end subroutine
+
+! TWO_DIM-LABEL: omp.private {type = firstprivate}
+! TWO_DIM-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.box<!fir.array<\?x\?xi32>>]] alloc {
+
+! TWO_DIM-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+! TWO_DIM-NEXT: %[[C0:.*]] = arith.constant 0 : index
+! TWO_DIM-NEXT: %[[DIMS0:.*]]:3 = fir.box_dims %[[PRIV_ARG]], %[[C0]] : ([[TYPE]], index) -> (index, index, index)
+
+! TWO_DIM-NEXT: %[[C1:.*]] = arith.constant 1 : index
+! TWO_DIM-NEXT: %[[DIMS1:.*]]:3 = fir.box_dims %[[PRIV_ARG]], %[[C1]] : ([[TYPE]], index) -> (index, index, index)
+
+! TWO_DIM-NEXT: %[[PRIV_ALLOCA:.*]] = fir.alloca !fir.array<{{\?}}x{{\?}}xi32>, %[[DIMS0]]#1, %[[DIMS1]]#1 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! TWO_DIM-NEXT: %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[DIMS0]]#0, %[[DIMS0]]#1, %[[DIMS1]]#0, %[[DIMS1]]#1 : (index, index, index, index) -> !fir.shapeshift<2>
+
+! TWO_DIM-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOCA]](%[[SHAPE_SHIFT]]) {uniq_name = "_QFdelayed_privatization_privateEvar1"}
+! TWO_DIM-NEXT: omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])
+
+! TWO_DIM-NEXT: } copy {
+! TWO_DIM-NEXT: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
+! TWO_DIM-NEXT: hlfir.assign %[[PRIV_ORIG_ARG]] to %[[PRIV_PRIV_ARG]] temporary_lhs
+! TWO_DIM-NEXT: omp.yield(%[[PRIV_PRIV_ARG]] : [[TYPE]])
+! TWO_DIM-NEXT: }
>From 26b5485f6331568a899ed0c636457b24207c845f Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Wed, 13 Mar 2024 09:19:52 -0500
Subject: [PATCH 3/3] [flang][MLIR][OpenMP] Extend delayed privatization for
`CHARACTER`
Extends delayed privatization to support scalar `CHARACTER` variables.
`fir.unbox` is used to extract the relevant values from the privatized
value: its `!fir.ref<!fir.char<1,?>>` and length. The extracted values
are then used to reconstruct the variable through `hlfir.declare` to
which the symbol is mapped.
---
.../lib/Lower/OpenMP/DataSharingProcessor.cpp | 64 ++++++++++++-------
flang/lib/Lower/OpenMP/OpenMP.cpp | 21 ++++--
flang/lib/Optimizer/Builder/HLFIRTools.cpp | 1 +
.../OpenMP/delayed-privatization-array.f90 | 4 +-
.../delayed-privatization-character.f90 | 40 ++++++++++++
5 files changed, 101 insertions(+), 29 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-character.f90
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 2412c13e938075..a98c587db7aa3b 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -16,6 +16,7 @@
#include "flang/Lower/PFTBuilder.h"
#include "flang/Lower/SymbolMap.h"
#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Semantics/tools.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
@@ -378,19 +379,12 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate
: mlir::omp::DataSharingClauseType::Private);
fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym);
-
symTable->pushScope();
- // Populate the `alloc` region.
- {
- mlir::Region &allocRegion = result.getAllocRegion();
- mlir::Block *allocEntryBlock = firOpBuilder.createBlock(
- &allocRegion, /*insertPt=*/{}, symType, symLoc);
-
- firOpBuilder.setInsertionPointToEnd(allocEntryBlock);
-
- fir::ExtendedValue localExV = symExV.match(
- [&](const fir::ArrayBoxValue &box) -> fir::ExtendedValue {
+ auto addSymbol = [&](mlir::Region ®ion, unsigned argIdx,
+ bool force = false) {
+ symExV.match(
+ [&](const fir::ArrayBoxValue &box) {
auto idxTy = firOpBuilder.getIndexType();
llvm::SmallVector<mlir::Value> extents;
llvm::SmallVector<mlir::Value> lBounds;
@@ -399,20 +393,46 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
mlir::Value dimVal =
firOpBuilder.createIntegerConstant(symLoc, idxTy, dim);
fir::BoxDimsOp dimInfo = firOpBuilder.create<fir::BoxDimsOp>(
- symLoc, idxTy, idxTy, idxTy, allocRegion.getArgument(0),
+ symLoc, idxTy, idxTy, idxTy, region.getArgument(argIdx),
dimVal);
extents.push_back(dimInfo.getExtent());
lBounds.push_back(dimInfo.getLowerBound());
}
- return fir::ArrayBoxValue(allocRegion.getArgument(0), extents,
- lBounds);
+ symTable->addSymbol(*sym,
+ fir::ArrayBoxValue(region.getArgument(argIdx),
+ extents, lBounds),
+ force);
},
- [&](const auto &box) -> fir::ExtendedValue {
- return fir::substBase(symExV, allocRegion.getArgument(0));
+ [&](const fir::CharBoxValue &box) {
+ fir::BoxCharType boxCharType = symType.cast<fir::BoxCharType>();
+ mlir::Type charRefType =
+ firOpBuilder.getRefType(boxCharType.getEleTy());
+
+ fir::UnboxCharOp unboxedArg = firOpBuilder.create<fir::UnboxCharOp>(
+ symLoc, charRefType, firOpBuilder.getCharacterLengthType(),
+ region.getArgument(argIdx));
+ hlfir::DeclareOp localVar = firOpBuilder.create<hlfir::DeclareOp>(
+ symLoc, unboxedArg.getResult(0), converter.mangleName(*sym),
+ nullptr,
+ llvm::SmallVector<mlir::Value>{unboxedArg.getResult(1)});
+ symTable->addVariableDefinition(*sym, localVar, force);
+ },
+ [&](const auto &box) {
+ symTable->addSymbol(
+ *sym, fir::substBase(box, region.getArgument(argIdx)), force);
});
+ };
- symTable->addSymbol(*sym, localExV);
+ // Populate the `alloc` region.
+ {
+ mlir::Region &allocRegion = result.getAllocRegion();
+ mlir::Block *allocEntryBlock = firOpBuilder.createBlock(
+ &allocRegion, /*insertPt=*/{}, symType, symLoc);
+
+ firOpBuilder.setInsertionPointToEnd(allocEntryBlock);
+
+ addSymbol(allocRegion, 0);
symTable->pushScope();
cloneSymbol(sym);
firOpBuilder.create<mlir::omp::YieldOp>(
@@ -429,12 +449,12 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
mlir::Block *copyEntryBlock = firOpBuilder.createBlock(
©Region, /*insertPt=*/{}, {symType, symType}, {symLoc, symLoc});
firOpBuilder.setInsertionPointToEnd(copyEntryBlock);
- symTable->addSymbol(*sym,
- fir::substBase(symExV, copyRegion.getArgument(0)),
- /*force=*/true);
+
+
+
+ addSymbol(copyRegion, 0, true);
symTable->pushScope();
- symTable->addSymbol(*sym,
- fir::substBase(symExV, copyRegion.getArgument(1)));
+ addSymbol(copyRegion, 1);
auto ip = firOpBuilder.saveInsertionPoint();
copyFirstPrivateSymbol(sym, &ip);
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 25bb4d9cff5d16..9f07eb922876ce 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -655,20 +655,31 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
llvm::transform(privateVars, std::back_inserter(privateVarLocs),
[](mlir::Value v) { return v.getLoc(); });
- converter.getFirOpBuilder().createBlock(®ion, /*insertPt=*/{},
+ fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+ builder.createBlock(®ion, /*insertPt=*/{},
privateVarTypes, privateVarLocs);
llvm::SmallVector<const Fortran::semantics::Symbol *> allSymbols =
reductionSymbols;
allSymbols.append(delayedPrivatizationInfo.symbols);
- for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) {
- converter.bindSymbol(*arg, prv);
- }
+ for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments()))
+ if (fir::BoxCharType boxCharTy =
+ prv.getType().dyn_cast<fir::BoxCharType>()) {
+ mlir::Type charRefType = builder.getRefType(boxCharTy.getEleTy());
+
+ fir::UnboxCharOp unboxedArg = builder.create<fir::UnboxCharOp>(
+ builder.getUnknownLoc(), charRefType,
+ builder.getCharacterLengthType(), prv);
+
+ fir::CharBoxValue newBox(unboxedArg.getResult(0),
+ unboxedArg.getResult(1));
+ converter.bindSymbol(*arg, newBox);
+ } else
+ converter.bindSymbol(*arg, prv);
return allSymbols;
};
- // TODO Merge with the reduction CB.
genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp);
llvm::SmallVector<mlir::Attribute> privatizers(
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index c7a550814e1d58..3651f53c8fdc53 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -897,6 +897,7 @@ translateVariableToExtendedValue(mlir::Location loc, fir::FirOpBuilder &builder,
return fir::CharArrayBoxValue{
base, genCharacterVariableLength(loc, builder, variable), extents,
nonDefaultLbounds};
+
return fir::ArrayBoxValue{base, extents, nonDefaultLbounds};
}
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-array.f90 b/flang/test/Lower/OpenMP/delayed-privatization-array.f90
index 23b61563781eb1..f2de4edc24ce48 100644
--- a/flang/test/Lower/OpenMP/delayed-privatization-array.f90
+++ b/flang/test/Lower/OpenMP/delayed-privatization-array.f90
@@ -37,7 +37,7 @@ subroutine delayed_privatization_private(var1, l1, u1)
! ONE_DIM-NEXT: } copy {
! ONE_DIM-NEXT: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
-! ONE_DIM-NEXT: hlfir.assign %[[PRIV_ORIG_ARG]] to %[[PRIV_PRIV_ARG]] temporary_lhs
+! ONE_DIM: hlfir.assign %[[PRIV_ORIG_ARG]] to %[[PRIV_PRIV_ARG]] temporary_lhs
! ONE_DIM-NEXT: omp.yield(%[[PRIV_PRIV_ARG]] : [[TYPE]])
! ONE_DIM-NEXT: }
@@ -70,6 +70,6 @@ subroutine delayed_privatization_private(var1, l1, u1, l2, u2)
! TWO_DIM-NEXT: } copy {
! TWO_DIM-NEXT: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
-! TWO_DIM-NEXT: hlfir.assign %[[PRIV_ORIG_ARG]] to %[[PRIV_PRIV_ARG]] temporary_lhs
+! TWO_DIM: hlfir.assign %[[PRIV_ORIG_ARG]] to %[[PRIV_PRIV_ARG]] temporary_lhs
! TWO_DIM-NEXT: omp.yield(%[[PRIV_PRIV_ARG]] : [[TYPE]])
! TWO_DIM-NEXT: }
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-character.f90 b/flang/test/Lower/OpenMP/delayed-privatization-character.f90
new file mode 100644
index 00000000000000..756f2bb33f84b1
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-character.f90
@@ -0,0 +1,40 @@
+! Test delayed privatization for the `CHARACTER` type.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
+! RUN: -o - %s 2>&1 | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 \
+! RUN: | FileCheck %s
+
+subroutine delayed_privatization_character(var1, l)
+ implicit none
+ integer(8):: l
+ character(len = l) :: var1
+
+!$omp parallel firstprivate(var1)
+ var1 = "test"
+!$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = firstprivate}
+! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.boxchar<1>]] alloc {
+
+! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
+! CHECK-NEXT: %[[UNBOX:.*]]:2 = fir.unboxchar %[[PRIV_ARG]]
+! CHECK-NEXT: %[[PRIV_ARG_DECL:.*]]:2 = hlfir.declare %[[UNBOX]]#0 typeparams %[[UNBOX]]#1
+! CHECK: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.char<1,?>(%[[UNBOX]]#1 : index)
+! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]] typeparams %[[UNBOX]]#1
+! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : !fir.boxchar<1>)
+
+! CHECK-NEXT: } copy {
+! CHECK-NEXT: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
+
+! CHECK-NEXT: %[[ORIG_UNBOX:.*]]:2 = fir.unboxchar %[[PRIV_ORIG_ARG]]
+! CHECK-NEXT: %[[ORIG_DECL:.*]]:2 = hlfir.declare %[[ORIG_UNBOX]]#0 typeparams %[[ORIG_UNBOX]]#1
+
+! CHECK-NEXT: %[[PRIV_UNBOX:.*]]:2 = fir.unboxchar %[[PRIV_PRIV_ARG]]
+! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_UNBOX]]#0 typeparams %[[PRIV_UNBOX]]#1
+
+! CHECK-NEXT: hlfir.assign %[[ORIG_DECL]]#0 to %[[PRIV_DECL]]#0
+
+! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : !fir.boxchar<1>)
+! CHECK-NEXT: }
More information about the flang-commits
mailing list