[flang-commits] [flang] [flang][Lower][OpenMP] Don't read moldarg for static sized array #125901 (PR #127838)

via flang-commits flang-commits at lists.llvm.org
Wed Feb 19 10:06:43 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-openmp

@llvm/pr-subscribers-flang-fir-hlfir

Author: Tom Eccles (tblah)

<details>
<summary>Changes</summary>

This should further reduce the number of spurious barriers

----

This PR is to re-land #<!-- -->125901 after it had to be reverted due to a testsuite failure after applying to the main branch. The bugfix is in 85e6ba8668cb0619638b7c8c9f2da360707c6b15.

---
Full diff: https://github.com/llvm/llvm-project/pull/127838.diff


5 Files Affected:

- (modified) flang/lib/Lower/OpenMP/DataSharingProcessor.cpp (+3-2) 
- (modified) flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp (+41-20) 
- (modified) flang/lib/Lower/OpenMP/PrivateReductionUtils.h (+4-2) 
- (modified) flang/test/Lower/OpenMP/delayed-privatization-array.f90 (+3-4) 
- (modified) flang/test/Lower/OpenMP/different_vars_lastprivate_barrier.f90 (+6-5) 


``````````diff
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index d13f101f516e7..d725dfd3e94f3 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -508,6 +508,8 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
 
   lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym);
   assert(hsb && "Host symbol box not found");
+  hlfir::Entity entity{hsb.getAddr()};
+  bool cannotHaveNonDefaultLowerBounds = !entity.mayHaveNonDefaultLowerBounds();
 
   mlir::Location symLoc = hsb.getAddr().getLoc();
   std::string privatizerName = sym->name().ToString() + ".privatizer";
@@ -528,7 +530,6 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
   // an alloca for a fir.array type there. Get around this by boxing all
   // arrays.
   if (mlir::isa<fir::SequenceType>(allocType)) {
-    hlfir::Entity entity{hsb.getAddr()};
     entity = genVariableBox(symLoc, firOpBuilder, entity);
     privVal = entity.getBase();
     allocType = privVal.getType();
@@ -590,7 +591,7 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
           result.getDeallocRegion(),
           isFirstPrivate ? DeclOperationKind::FirstPrivate
                          : DeclOperationKind::Private,
-          sym);
+          sym, cannotHaveNonDefaultLowerBounds);
       // TODO: currently there are false positives from dead uses of the mold
       // arg
       if (!result.getInitMoldArg().getUses().empty())
diff --git a/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp b/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp
index 22cd0679050db..21ade77d82d37 100644
--- a/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp
+++ b/flang/lib/Lower/OpenMP/PrivateReductionUtils.cpp
@@ -122,25 +122,40 @@ static void createCleanupRegion(Fortran::lower::AbstractConverter &converter,
   typeError();
 }
 
-fir::ShapeShiftOp Fortran::lower::omp::getShapeShift(fir::FirOpBuilder &builder,
-                                                     mlir::Location loc,
-                                                     mlir::Value box) {
+fir::ShapeShiftOp
+Fortran::lower::omp::getShapeShift(fir::FirOpBuilder &builder,
+                                   mlir::Location loc, mlir::Value box,
+                                   bool cannotHaveNonDefaultLowerBounds) {
   fir::SequenceType sequenceType = mlir::cast<fir::SequenceType>(
       hlfir::getFortranElementOrSequenceType(box.getType()));
   const unsigned rank = sequenceType.getDimension();
+
   llvm::SmallVector<mlir::Value> lbAndExtents;
   lbAndExtents.reserve(rank * 2);
-
   mlir::Type idxTy = builder.getIndexType();
-  for (unsigned i = 0; i < rank; ++i) {
-    // TODO: ideally we want to hoist box reads out of the critical section.
-    // We could do this by having box dimensions in block arguments like
-    // OpenACC does
-    mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
-    auto dimInfo =
-        builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, box, dim);
-    lbAndExtents.push_back(dimInfo.getLowerBound());
-    lbAndExtents.push_back(dimInfo.getExtent());
+
+  if (cannotHaveNonDefaultLowerBounds && !sequenceType.hasDynamicExtents()) {
+    // We don't need fir::BoxDimsOp if all of the extents are statically known
+    // and we can assume default lower bounds. This helps avoids reads from the
+    // mold arg.
+    mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
+    for (int64_t extent : sequenceType.getShape()) {
+      assert(extent != sequenceType.getUnknownExtent());
+      mlir::Value extentVal = builder.createIntegerConstant(loc, idxTy, extent);
+      lbAndExtents.push_back(one);
+      lbAndExtents.push_back(extentVal);
+    }
+  } else {
+    for (unsigned i = 0; i < rank; ++i) {
+      // TODO: ideally we want to hoist box reads out of the critical section.
+      // We could do this by having box dimensions in block arguments like
+      // OpenACC does
+      mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
+      auto dimInfo =
+          builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, box, dim);
+      lbAndExtents.push_back(dimInfo.getLowerBound());
+      lbAndExtents.push_back(dimInfo.getExtent());
+    }
   }
 
   auto shapeShiftTy = fir::ShapeShiftType::get(builder.getContext(), rank);
@@ -248,12 +263,13 @@ class PopulateInitAndCleanupRegionsHelper {
       mlir::Type argType, mlir::Value scalarInitValue,
       mlir::Value allocatedPrivVarArg, mlir::Value moldArg,
       mlir::Block *initBlock, mlir::Region &cleanupRegion,
-      DeclOperationKind kind, const Fortran::semantics::Symbol *sym)
+      DeclOperationKind kind, const Fortran::semantics::Symbol *sym,
+      bool cannotHaveLowerBounds)
       : converter{converter}, builder{converter.getFirOpBuilder()}, loc{loc},
         argType{argType}, scalarInitValue{scalarInitValue},
         allocatedPrivVarArg{allocatedPrivVarArg}, moldArg{moldArg},
         initBlock{initBlock}, cleanupRegion{cleanupRegion}, kind{kind},
-        sym{sym} {
+        sym{sym}, cannotHaveNonDefaultLowerBounds{cannotHaveLowerBounds} {
     valType = fir::unwrapRefType(argType);
   }
 
@@ -295,6 +311,10 @@ class PopulateInitAndCleanupRegionsHelper {
   /// Any length parameters which have been fetched for the type
   mlir::SmallVector<mlir::Value> lenParams;
 
+  /// If the source variable being privatized definitely can't have non-default
+  /// lower bounds then we don't need to generate code to read them.
+  bool cannotHaveNonDefaultLowerBounds;
+
   void createYield(mlir::Value ret) {
     builder.create<mlir::omp::YieldOp>(loc, ret);
   }
@@ -432,7 +452,8 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedArray(
   // Special case for (possibly allocatable) arrays of polymorphic types
   // e.g. !fir.class<!fir.heap<!fir.array<?x!fir.type<>>>>
   if (source.isPolymorphic()) {
-    fir::ShapeShiftOp shape = getShapeShift(builder, loc, source);
+    fir::ShapeShiftOp shape =
+        getShapeShift(builder, loc, source, cannotHaveNonDefaultLowerBounds);
     mlir::Type arrayType = source.getElementOrSequenceType();
     mlir::Value allocatedArray = builder.create<fir::AllocMemOp>(
         loc, arrayType, /*typeparams=*/mlir::ValueRange{}, shape.getExtents());
@@ -471,8 +492,8 @@ void PopulateInitAndCleanupRegionsHelper::initAndCleanupBoxedArray(
   // Put the temporary inside of a box:
   // hlfir::genVariableBox doesn't handle non-default lower bounds
   mlir::Value box;
-  fir::ShapeShiftOp shapeShift =
-      getShapeShift(builder, loc, getLoadedMoldArg());
+  fir::ShapeShiftOp shapeShift = getShapeShift(builder, loc, getLoadedMoldArg(),
+                                               cannotHaveNonDefaultLowerBounds);
   mlir::Type boxType = getLoadedMoldArg().getType();
   if (mlir::isa<fir::BaseBoxType>(temp.getType()))
     // the box created by the declare form createTempFromMold is missing
@@ -607,10 +628,10 @@ void Fortran::lower::omp::populateByRefInitAndCleanupRegions(
     mlir::Type argType, mlir::Value scalarInitValue, mlir::Block *initBlock,
     mlir::Value allocatedPrivVarArg, mlir::Value moldArg,
     mlir::Region &cleanupRegion, DeclOperationKind kind,
-    const Fortran::semantics::Symbol *sym) {
+    const Fortran::semantics::Symbol *sym, bool cannotHaveLowerBounds) {
   PopulateInitAndCleanupRegionsHelper helper(
       converter, loc, argType, scalarInitValue, allocatedPrivVarArg, moldArg,
-      initBlock, cleanupRegion, kind, sym);
+      initBlock, cleanupRegion, kind, sym, cannotHaveLowerBounds);
   helper.populateByRefInitAndCleanupRegions();
 
   // Often we load moldArg to check something (e.g. length parameters, shape)
diff --git a/flang/lib/Lower/OpenMP/PrivateReductionUtils.h b/flang/lib/Lower/OpenMP/PrivateReductionUtils.h
index fcd36392a29e0..0a3513bff19b0 100644
--- a/flang/lib/Lower/OpenMP/PrivateReductionUtils.h
+++ b/flang/lib/Lower/OpenMP/PrivateReductionUtils.h
@@ -55,11 +55,13 @@ void populateByRefInitAndCleanupRegions(
     mlir::Value scalarInitValue, mlir::Block *initBlock,
     mlir::Value allocatedPrivVarArg, mlir::Value moldArg,
     mlir::Region &cleanupRegion, DeclOperationKind kind,
-    const Fortran::semantics::Symbol *sym = nullptr);
+    const Fortran::semantics::Symbol *sym = nullptr,
+    bool cannotHaveNonDefaultLowerBounds = false);
 
 /// Generate a fir::ShapeShift op describing the provided boxed array.
 fir::ShapeShiftOp getShapeShift(fir::FirOpBuilder &builder, mlir::Location loc,
-                                mlir::Value box);
+                                mlir::Value box,
+                                bool cannotHaveNonDefaultLowerBounds = false);
 
 } // namespace omp
 } // namespace lower
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-array.f90 b/flang/test/Lower/OpenMP/delayed-privatization-array.f90
index 95fa3f9e03052..c447fa6f27a75 100644
--- a/flang/test/Lower/OpenMP/delayed-privatization-array.f90
+++ b/flang/test/Lower/OpenMP/delayed-privatization-array.f90
@@ -108,15 +108,14 @@ program main
 ! ONE_DIM_DEFAULT_LB-SAME: @[[PRIVATIZER_SYM:.*]] : [[BOX_TYPE:!fir.box<!fir.array<10xi32>>]] init {
 
 ! ONE_DIM_DEFAULT_LB-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE:!fir.ref<!fir.box<!fir.array<10xi32>>>]], %[[PRIV_BOX_ALLOC:.*]]: [[TYPE]]):
-! ONE_DIM_DEFAULT_LB-NEXT:   %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]]
 ! ONE_DIM_DEFAULT_LB-NEXT:   %[[C10:.*]] = arith.constant 10 : index
 ! ONE_DIM_DEFAULT_LB-NEXT:   %[[SHAPE:.*]] = fir.shape %[[C10]]
 ! ONE_DIM_DEFAULT_LB-NEXT:   %[[ARRAY_ALLOC:.*]] = fir.allocmem !fir.array<10xi32>
 ! ONE_DIM_DEFAULT_LB-NEXT:   %[[TRUE:.*]] = arith.constant true
 ! ONE_DIM_DEFAULT_LB-NEXT:   %[[DECL:.*]]:2 = hlfir.declare %[[ARRAY_ALLOC]](%[[SHAPE]])
-! ONE_DIM_DEFAULT_LB-NEXT:   %[[C0_0:.*]] = arith.constant 0
-! ONE_DIM_DEFAULT_LB-NEXT:   %[[DIMS2:.*]]:3 = fir.box_dims %[[PRIV_ARG_VAL]], %[[C0_0]]
-! ONE_DIM_DEFAULT_LB-NEXT:   %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[DIMS2]]#0, %[[DIMS2]]#1
+! ONE_DIM_DEFAULT_LB-NEXT:   %[[ONE:.*]] = arith.constant 1 : index
+! ONE_DIM_DEFAULT_LB-NEXT:   %[[TEN:.*]] = arith.constant 10 : index
+! ONE_DIM_DEFAULT_LB-NEXT:   %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[ONE]], %[[TEN]]
 ! ONE_DIM_DEFAULT_LB-NEXT:   %[[EMBOX:.*]] = fir.embox %[[DECL]]#0(%[[SHAPE_SHIFT]])
 ! ONE_DIM_DEFAULT_LB-NEXT:   fir.store %[[EMBOX]] to %[[PRIV_BOX_ALLOC]]
 ! ONE_DIM_DEFAULT_LB-NEXT:   omp.yield(%[[PRIV_BOX_ALLOC]] : [[TYPE]])
diff --git a/flang/test/Lower/OpenMP/different_vars_lastprivate_barrier.f90 b/flang/test/Lower/OpenMP/different_vars_lastprivate_barrier.f90
index 0f04977754291..b74e083925aba 100644
--- a/flang/test/Lower/OpenMP/different_vars_lastprivate_barrier.f90
+++ b/flang/test/Lower/OpenMP/different_vars_lastprivate_barrier.f90
@@ -1,8 +1,8 @@
 ! RUN: %flang_fc1 -fopenmp -mmlir --openmp-enable-delayed-privatization-staging=true -emit-hlfir %s -o - | FileCheck %s
 
-subroutine first_and_lastprivate
+subroutine first_and_lastprivate(var)
   integer i
-  integer, dimension(3) :: var
+  integer, dimension(:) :: var
 
   !$omp parallel do lastprivate(i) private(var)
   do i=1,1
@@ -10,19 +10,20 @@ subroutine first_and_lastprivate
   !$omp end parallel do
 end subroutine
 
-! CHECK:      omp.private {type = private} @[[VAR_PRIVATIZER:.*Evar_private_box_3xi32]] : [[BOX_TYPE:!fir\.box<!fir\.array<3xi32>>]] init {
+! CHECK:      omp.private {type = private} @[[VAR_PRIVATIZER:.*Evar_private_box_Uxi32]] : [[BOX_TYPE:!fir\.box<!fir\.array<\?xi32>>]] init {
 ! CHECK-NEXT: ^bb0(%[[ORIG_REF:.*]]: {{.*}}, %[[PRIV_REF:.*]]: {{.*}}):
 ! CHECK:        %[[ORIG_VAL:.*]] = fir.load %[[ORIG_REF]]
+! CHECK:        %[[BOX_DIMS_0:.*]]:3 = fir.box_dims %[[ORIG_VAL]], %{{.*}} : ([[BOX_TYPE]], index) -> (index, index, index)
 ! CHECK:        %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[ORIG_VAL]], %{{.*}} : ([[BOX_TYPE]], index) -> (index, index, index)
 ! CHECK:        %[[SHAPE_SHIFT:.*]] = fir.shape_shift %[[BOX_DIMS]]#0, %[[BOX_DIMS]]#1
-! CHECK:        %[[EMBOX:.*]] = fir.embox %{{.*}}(%[[SHAPE_SHIFT]]) : {{.*}} -> [[BOX_TYPE]]
+! CHECK:        %[[EMBOX:.*]] = fir.rebox %{{.*}}(%[[SHAPE_SHIFT]]) : {{.*}} -> [[BOX_TYPE]]
 ! CHECK:        fir.store %[[EMBOX]] to %[[PRIV_REF]]
 ! CHECK:        omp.yield(%[[PRIV_REF]] : !fir.ref<[[BOX_TYPE]]>)
 ! CHECK:      }
 
 ! CHECK: omp.private {type = private} @[[I_PRIVATIZER:.*Ei_private_i32]] : i32
 
-! CHECK:     func.func @{{.*}}first_and_lastprivate()
+! CHECK:     func.func @{{.*}}first_and_lastprivate({{.*}})
 ! CHECK:       %[[ORIG_I_DECL:.*]]:2 = hlfir.declare {{.*}} {uniq_name = "{{.*}}Ei"}
 ! CHECK:       omp.parallel {
 ! CHECK-NOT:      omp.barrier

``````````

</details>


https://github.com/llvm/llvm-project/pull/127838


More information about the flang-commits mailing list