[flang-commits] [flang] [flang] Code generation for fir.pack/unpack_array. (PR #132080)
via flang-commits
flang-commits at lists.llvm.org
Wed Mar 19 11:42:46 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-driver
Author: Slava Zakharin (vzakhari)
<details>
<summary>Changes</summary>
The code generation relies on `ShallowCopyDirect` runtime
to copy data between the original and the temporary arrays
(both directions). The allocations are done by the compiler
generated code. The heap allocations could have been passed
to `ShallowCopy` runtime, but I decided to expose the allocations
so that the temporary descriptor passed to `ShallowCopyDirect`
has `nocapture` - maybe this will be better for LLVM optimizations.
---
Patch is 125.10 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132080.diff
10 Files Affected:
- (modified) flang/include/flang/Optimizer/CodeGen/CGPasses.td (+11)
- (modified) flang/include/flang/Optimizer/CodeGen/CodeGen.h (+1)
- (modified) flang/lib/Optimizer/CodeGen/CMakeLists.txt (+1)
- (added) flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp (+403)
- (modified) flang/lib/Optimizer/Passes/Pipelines.cpp (+1)
- (modified) flang/test/Driver/bbc-mlir-pass-pipeline.f90 (+1)
- (modified) flang/test/Driver/mlir-debug-pass-pipeline.f90 (+1)
- (modified) flang/test/Driver/mlir-pass-pipeline.f90 (+1)
- (modified) flang/test/Fir/basic-program.fir (+1)
- (added) flang/test/Transforms/lower-repack-arrays.fir (+1124)
``````````diff
diff --git a/flang/include/flang/Optimizer/CodeGen/CGPasses.td b/flang/include/flang/Optimizer/CodeGen/CGPasses.td
index 2e097faec5403..df0ecf5540776 100644
--- a/flang/include/flang/Optimizer/CodeGen/CGPasses.td
+++ b/flang/include/flang/Optimizer/CodeGen/CGPasses.td
@@ -99,4 +99,15 @@ def BoxedProcedurePass : Pass<"boxed-procedure", "mlir::ModuleOp"> {
];
}
+def LowerRepackArraysPass : Pass<"lower-repack-arrays", "mlir::ModuleOp"> {
+ let summary = "Convert fir.pack/unpack_array to other FIR operations";
+ let description = [{
+ Convert fir.pack/unpack_array operations to other FIR operations
+ and Fortran runtime calls that implement the semantics
+ of packing/unpacking.
+ }];
+ let dependentDialects = ["fir::FIROpsDialect", "mlir::arith::ArithDialect",
+ "mlir::func::FuncDialect"];
+}
+
#endif // FORTRAN_OPTIMIZER_CODEGEN_FIR_PASSES
diff --git a/flang/include/flang/Optimizer/CodeGen/CodeGen.h b/flang/include/flang/Optimizer/CodeGen/CodeGen.h
index 255b1950c8425..0398d0f248e08 100644
--- a/flang/include/flang/Optimizer/CodeGen/CodeGen.h
+++ b/flang/include/flang/Optimizer/CodeGen/CodeGen.h
@@ -26,6 +26,7 @@ struct NameUniquer;
#define GEN_PASS_DECL_CODEGENREWRITE
#define GEN_PASS_DECL_TARGETREWRITEPASS
#define GEN_PASS_DECL_BOXEDPROCEDUREPASS
+#define GEN_PASS_DECL_LOWERREPACKARRAYSPASS
#include "flang/Optimizer/CodeGen/CGPasses.h.inc"
/// FIR to LLVM translation pass options.
diff --git a/flang/lib/Optimizer/CodeGen/CMakeLists.txt b/flang/lib/Optimizer/CodeGen/CMakeLists.txt
index 553c20bb85d38..f730c7fd03948 100644
--- a/flang/lib/Optimizer/CodeGen/CMakeLists.txt
+++ b/flang/lib/Optimizer/CodeGen/CMakeLists.txt
@@ -4,6 +4,7 @@ add_flang_library(FIRCodeGen
CodeGen.cpp
CodeGenOpenMP.cpp
FIROpPatterns.cpp
+ LowerRepackArrays.cpp
PreCGRewrite.cpp
TBAABuilder.cpp
Target.cpp
diff --git a/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp b/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp
new file mode 100644
index 0000000000000..c109dc4732ca5
--- /dev/null
+++ b/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp
@@ -0,0 +1,403 @@
+//===-- LowerRepackArrays.cpp
+//------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/CodeGen/CodeGen.h"
+
+#include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Optimizer/Builder/Character.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/MutableBox.h"
+#include "flang/Optimizer/Builder/Runtime/Allocatable.h"
+#include "flang/Optimizer/Builder/Runtime/Transformational.h"
+#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Support/DataLayout.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+
+namespace fir {
+#define GEN_PASS_DEF_LOWERREPACKARRAYSPASS
+#include "flang/Optimizer/CodeGen/CGPasses.h.inc"
+} // namespace fir
+
+#define DEBUG_TYPE "lower-repack-arrays"
+
+namespace {
+class RepackArrayConversion {
+public:
+ RepackArrayConversion(std::optional<mlir::DataLayout> dataLayout)
+ : dataLayout(dataLayout) {}
+
+protected:
+ std::optional<mlir::DataLayout> dataLayout;
+
+ static bool canAllocateTempOnStack(mlir::Value box);
+};
+
+class PackArrayConversion : public mlir::OpRewritePattern<fir::PackArrayOp>,
+ RepackArrayConversion {
+public:
+ using OpRewritePattern::OpRewritePattern;
+
+ PackArrayConversion(mlir::MLIRContext *context,
+ std::optional<mlir::DataLayout> dataLayout)
+ : OpRewritePattern(context), RepackArrayConversion(dataLayout) {}
+
+ mlir::LogicalResult
+ matchAndRewrite(fir::PackArrayOp op,
+ mlir::PatternRewriter &rewriter) const override;
+
+private:
+ static constexpr llvm::StringRef bufferName = ".repacked";
+
+ static mlir::Value allocateTempBuffer(fir::FirOpBuilder &builder,
+ mlir::Location loc, bool useStack,
+ mlir::Value origBox,
+ llvm::ArrayRef<mlir::Value> extents,
+ llvm::ArrayRef<mlir::Value> typeParams);
+};
+
+class UnpackArrayConversion : public mlir::OpRewritePattern<fir::UnpackArrayOp>,
+ RepackArrayConversion {
+public:
+ using OpRewritePattern::OpRewritePattern;
+
+ UnpackArrayConversion(mlir::MLIRContext *context,
+ std::optional<mlir::DataLayout> dataLayout)
+ : OpRewritePattern(context), RepackArrayConversion(dataLayout) {}
+
+ mlir::LogicalResult
+ matchAndRewrite(fir::UnpackArrayOp op,
+ mlir::PatternRewriter &rewriter) const override;
+};
+} // anonymous namespace
+
+bool RepackArrayConversion::canAllocateTempOnStack(mlir::Value box) {
+ return !fir::isPolymorphicType(box.getType());
+}
+
+mlir::LogicalResult
+PackArrayConversion::matchAndRewrite(fir::PackArrayOp op,
+ mlir::PatternRewriter &rewriter) const {
+ mlir::Location loc = op.getLoc();
+ fir::FirOpBuilder builder(rewriter, op.getOperation());
+ if (op.getMaxSize() || op.getMaxElementSize() || op.getMinStride())
+ TODO(loc, "fir.pack_array with constraints");
+ if (op.getHeuristics() != fir::PackArrayHeuristics::None)
+ TODO(loc, "fir.pack_array with heuristics");
+
+ mlir::Value box = op.getArray();
+ llvm::SmallVector<mlir::Value> typeParams(op.getTypeparams().begin(),
+ op.getTypeparams().end());
+ // TODO: set non-default lower bounds on fir.pack_array,
+ // so that we can preserve lower bounds in the temporary box.
+ fir::BoxValue boxValue(box, /*lbounds=*/{}, typeParams);
+ mlir::Type boxType = boxValue.getBoxTy();
+ unsigned rank = boxValue.rank();
+ mlir::Type indexType = builder.getIndexType();
+ mlir::Value zero = fir::factory::createZeroValue(builder, loc, indexType);
+
+ // Fetch the extents from the box, and see if the array
+ // is not empty.
+ // If the type params are not explicitly provided, then we must also
+ // fetch the type parameters from the box.
+ //
+ // bool isNotEmpty;
+ // vector<int64_t> extents;
+ // if (IsPresent(box) && !IsContiguous[UpTo](box[, 1])) {
+ // isNotEmpty = box->base_addr != null;
+ // extents = SHAPE(box);
+ // } else {
+ // isNotEmpty = false;
+ // extents = vector<int64_t>(rank, 0);
+ // }
+
+ unsigned numTypeParams = 0;
+ if (typeParams.size() == 0) {
+ if (auto recordType = mlir::dyn_cast<fir::RecordType>(boxValue.getEleTy()))
+ if (recordType.getNumLenParams() != 0)
+ TODO(loc,
+ "allocating temporary for a parameterized derived type array");
+
+ if (auto charType = mlir::dyn_cast<fir::CharacterType>(boxValue.getEleTy()))
+ if (charType.hasDynamicLen())
+ numTypeParams = 1;
+ }
+
+ // For now we have to always check if the box is present.
+ mlir::Type predicateType = builder.getI1Type();
+ auto isPresent =
+ builder.create<fir::IsPresentOp>(loc, predicateType, boxValue.getAddr());
+
+ // The results of the IfOp are:
+ // (extent1, ..., extentN, typeParam1, ..., typeParamM, isNotEmpty)
+ // The number of results is rank + numTypeParams + 1.
+ llvm::SmallVector<mlir::Type> ifTypes(rank + numTypeParams, indexType);
+ ifTypes.push_back(predicateType);
+ llvm::SmallVector<mlir::Value> negativeResult(rank + numTypeParams, zero);
+ negativeResult.push_back(
+ fir::factory::createZeroValue(builder, loc, predicateType));
+ bool failedTypeParams = false;
+ llvm::SmallVector<mlir::Value> extentsAndPredicate =
+ builder
+ .genIfOp(loc, ifTypes, isPresent,
+ /*withElseRegion=*/true)
+ .genThen([&]() {
+ // The box is present.
+ auto isContiguous = builder.create<fir::IsContiguousBoxOp>(
+ loc, box, op.getInnermost());
+ llvm::SmallVector<mlir::Value> extentsAndPredicate =
+ builder
+ .genIfOp(loc, ifTypes, isContiguous,
+ /*withElseRegion=*/true)
+ .genThen([&]() {
+ // Box is contiguous, return zero.
+ builder.create<fir::ResultOp>(loc, negativeResult);
+ })
+ .genElse([&]() {
+ // Get the extents.
+ llvm::SmallVector<mlir::Value> results =
+ fir::factory::readExtents(builder, loc, boxValue);
+
+ // Get the type parameters from the box, if needed.
+ llvm::SmallVector<mlir::Value> assumedTypeParams;
+ if (numTypeParams != 0) {
+ if (auto charType = mlir::dyn_cast<fir::CharacterType>(
+ boxValue.getEleTy()))
+ if (charType.hasDynamicLen()) {
+ fir::factory::CharacterExprHelper charHelper(
+ builder, loc);
+ mlir::Value len = charHelper.readLengthFromBox(
+ boxValue.getAddr(), charType);
+ assumedTypeParams.push_back(
+ builder.createConvert(loc, indexType, len));
+ }
+
+ if (numTypeParams != assumedTypeParams.size()) {
+ failedTypeParams = true;
+ assumedTypeParams.append(
+ numTypeParams - assumedTypeParams.size(), zero);
+ }
+ }
+ results.append(assumedTypeParams);
+
+ auto dataAddr = builder.create<fir::BoxAddrOp>(
+ loc, boxValue.getMemTy(), boxValue.getAddr());
+ auto isNotEmpty = builder.create<fir::IsPresentOp>(
+ loc, predicateType, dataAddr);
+ results.push_back(isNotEmpty);
+ builder.create<fir::ResultOp>(loc, results);
+ })
+ .getResults();
+
+ builder.create<fir::ResultOp>(loc, extentsAndPredicate);
+ })
+ .genElse([&]() {
+ // Box is absent, nothing to do.
+ builder.create<fir::ResultOp>(loc, negativeResult);
+ })
+ .getResults();
+
+ if (failedTypeParams)
+ return emitError(loc) << "failed to compute the type parameters for "
+ << op.getOperation() << '\n';
+
+ // The last result is the isNotEmpty predicate value.
+ mlir::Value isNotEmpty = extentsAndPredicate.pop_back_val();
+ // If fir.pack_array does not specify type parameters, but they are needed
+ // for the type, then use the parameters fetched from the box.
+ if (typeParams.size() == 0 && numTypeParams != 0) {
+ assert(extentsAndPredicate.size() > numTypeParams);
+ typeParams.append(extentsAndPredicate.end() - numTypeParams,
+ extentsAndPredicate.end());
+ extentsAndPredicate.pop_back_n(numTypeParams);
+ }
+ // The remaining resulst are the extents.
+ llvm::SmallVector<mlir::Value> extents = std::move(extentsAndPredicate);
+ assert(extents.size() == rank);
+
+ mlir::Value tempBox;
+ // Allocate memory for the temporary, if allocating on stack.
+ // We can do it unconditionally, even if size is zero.
+ if (op.getStack() && canAllocateTempOnStack(boxValue.getAddr())) {
+ tempBox = allocateTempBuffer(builder, loc, /*useStack=*/true,
+ boxValue.getAddr(), extents, typeParams);
+ if (!tempBox)
+ return rewriter.notifyMatchFailure(op,
+ "failed to produce stack allocation");
+ }
+
+ mlir::Value newResult =
+ builder.genIfOp(loc, {boxType}, isNotEmpty, /*withElseRegion=*/true)
+ .genThen([&]() {
+ // Do the heap allocation conditionally.
+ if (!tempBox)
+ tempBox =
+ allocateTempBuffer(builder, loc, /*useStack=*/false,
+ boxValue.getAddr(), extents, typeParams);
+
+ // Do the copy, if needed, and return the new box (shaped same way
+ // as the original one).
+ if (!op.getNoCopy())
+ fir::runtime::genShallowCopy(builder, loc, tempBox,
+ boxValue.getAddr(),
+ /*resultIsAllocated=*/true);
+
+ // Set the lower bounds after the original box.
+ mlir::Value shape;
+ if (!boxValue.getLBounds().empty()) {
+ shape = builder.genShape(loc, boxValue.getLBounds(), extents);
+ }
+
+ // Rebox the temporary box to make its type the same as
+ // the original box's.
+ tempBox = builder.create<fir::ReboxOp>(loc, boxType, tempBox, shape,
+ /*slice=*/nullptr);
+ builder.create<fir::ResultOp>(loc, tempBox);
+ })
+ .genElse([&]() {
+ // Return original box.
+ builder.create<fir::ResultOp>(loc, boxValue.getAddr());
+ })
+ .getResults()[0];
+
+ rewriter.replaceOp(op, newResult);
+ return mlir::success();
+}
+
+mlir::Value PackArrayConversion::allocateTempBuffer(
+ fir::FirOpBuilder &builder, mlir::Location loc, bool useStack,
+ mlir::Value origBox, llvm::ArrayRef<mlir::Value> extents,
+ llvm::ArrayRef<mlir::Value> typeParams) {
+ auto tempType = mlir::cast<fir::SequenceType>(
+ fir::extractSequenceType(origBox.getType()));
+ assert(tempType.getDimension() == extents.size() &&
+ "number of extents does not match the rank");
+
+ if (fir::isPolymorphicType(origBox.getType())) {
+ // Use runtime to allocate polymorphic temporary vector using the dynamic
+ // type of the original box and the provided numElements.
+ // TODO: try to generalize it with BufferizeHLFIR.cpp:createArrayTemp().
+
+ // We cannot allocate polymorphic entity on stack.
+ // Return null, and allow the caller to reissue the call.
+ if (useStack)
+ return nullptr;
+
+ mlir::Type indexType = builder.getIndexType();
+ mlir::Type boxHeapType = fir::HeapType::get(tempType);
+ mlir::Value boxAlloc = fir::factory::genNullBoxStorage(
+ builder, loc, fir::ClassType::get(boxHeapType));
+ fir::runtime::genAllocatableApplyMold(builder, loc, boxAlloc, origBox,
+ tempType.getDimension());
+ mlir::Value one = builder.createIntegerConstant(loc, indexType, 1);
+ unsigned dim = 0;
+ for (mlir::Value extent : extents) {
+ mlir::Value dimIndex =
+ builder.createIntegerConstant(loc, indexType, dim++);
+ fir::runtime::genAllocatableSetBounds(builder, loc, boxAlloc, dimIndex,
+ one, extent);
+ }
+
+ if (!typeParams.empty()) {
+ // We should call AllocatableSetDerivedLength() here.
+ TODO(loc,
+ "polymorphic type with length parameters in PackArrayConversion");
+ }
+
+ fir::runtime::genAllocatableAllocate(builder, loc, boxAlloc);
+ return builder.create<fir::LoadOp>(loc, boxAlloc);
+ }
+
+ // Allocate non-polymorphic temporary on stack or in heap.
+ mlir::Value newBuffer;
+ if (useStack)
+ newBuffer =
+ builder.createTemporary(loc, tempType, bufferName, extents, typeParams);
+ else
+ newBuffer = builder.createHeapTemporary(loc, tempType, bufferName, extents,
+ typeParams);
+
+ mlir::Type ptrType = newBuffer.getType();
+ mlir::Type tempBoxType = fir::BoxType::get(mlir::isa<fir::HeapType>(ptrType)
+ ? ptrType
+ : fir::unwrapRefType(ptrType));
+ mlir::Value shape = builder.genShape(loc, extents);
+ mlir::Value newBox =
+ builder.createBox(loc, tempBoxType, newBuffer, shape, /*slice=*/nullptr,
+ typeParams, /*tdesc=*/nullptr);
+ return newBox;
+}
+
+mlir::LogicalResult
+UnpackArrayConversion::matchAndRewrite(fir::UnpackArrayOp op,
+ mlir::PatternRewriter &rewriter) const {
+ mlir::Location loc = op.getLoc();
+ fir::FirOpBuilder builder(rewriter, op.getOperation());
+ mlir::Type predicateType = builder.getI1Type();
+ mlir::Type indexType = builder.getIndexType();
+ mlir::Value tempBox = op.getTemp();
+ mlir::Value originalBox = op.getOriginal();
+
+ // For now we have to always check if the box is present.
+ auto isPresent =
+ builder.create<fir::IsPresentOp>(loc, predicateType, originalBox);
+
+ builder.genIfThen(loc, isPresent).genThen([&]() {
+ mlir::Type addrType =
+ fir::HeapType::get(fir::extractSequenceType(tempBox.getType()));
+ mlir::Value tempAddr =
+ builder.create<fir::BoxAddrOp>(loc, addrType, tempBox);
+ mlir::Value tempAddrAsIndex =
+ builder.createConvert(loc, indexType, tempAddr);
+ mlir::Value originalAddr =
+ builder.create<fir::BoxAddrOp>(loc, addrType, originalBox);
+ originalAddr = builder.createConvert(loc, indexType, originalAddr);
+
+ auto isNotSame = builder.create<mlir::arith::CmpIOp>(
+ loc, mlir::arith::CmpIPredicate::ne, tempAddrAsIndex, originalAddr);
+ builder.genIfThen(loc, isNotSame).genThen([&]() {});
+ // Copy from temporary to the original.
+ if (!op.getNoCopy())
+ fir::runtime::genShallowCopy(builder, loc, originalBox, tempBox,
+ /*resultIsAllocated=*/true);
+
+ // Deallocate, if it was allocated in heap.
+ if (!op.getStack())
+ builder.create<fir::FreeMemOp>(loc, tempAddr);
+ });
+ rewriter.eraseOp(op);
+ return mlir::success();
+}
+
+namespace {
+class LowerRepackArraysPass
+ : public fir::impl::LowerRepackArraysPassBase<LowerRepackArraysPass> {
+public:
+ using LowerRepackArraysPassBase<
+ LowerRepackArraysPass>::LowerRepackArraysPassBase;
+
+ void runOnOperation() override final {
+ auto *context = &getContext();
+ mlir::ModuleOp module = getOperation();
+ std::optional<mlir::DataLayout> dl = fir::support::getOrSetMLIRDataLayout(
+ module, /*allowDefaultLayout=*/false);
+ mlir::RewritePatternSet patterns(context);
+ patterns.insert<PackArrayConversion>(context, dl);
+ patterns.insert<UnpackArrayConversion>(context, dl);
+ mlir::GreedyRewriteConfig config;
+ config.enableRegionSimplification =
+ mlir::GreedySimplifyRegionLevel::Disabled;
+ (void)applyPatternsGreedily(module, std::move(patterns), config);
+ }
+};
+
+} // anonymous namespace
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 3aea021e596f6..6ec19556625bc 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -198,6 +198,7 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
pm.addPass(fir::createPolymorphicOpConversion());
pm.addPass(fir::createAssumedRankOpConversion());
+ pm.addPass(fir::createLowerRepackArraysPass());
// Expand FIR operations that may use SCF dialect for their
// implementation. This is a mandatory pass.
pm.addPass(fir::createSimplifyFIROperations(
diff --git a/flang/test/Driver/bbc-mlir-pass-pipeline.f90 b/flang/test/Driver/bbc-mlir-pass-pipeline.f90
index 276ef818622a1..137c19608c38f 100644
--- a/flang/test/Driver/bbc-mlir-pass-pipeline.f90
+++ b/flang/test/Driver/bbc-mlir-pass-pipeline.f90
@@ -47,6 +47,7 @@
! CHECK-NEXT: PolymorphicOpConversion
! CHECK-NEXT: AssumedRankOpConversion
+! CHECK-NEXT: LowerRepackArraysPass
! CHECK-NEXT: SimplifyFIROperations
! CHECK-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
diff --git a/flang/test/Driver/mlir-debug-pass-...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/132080
More information about the flang-commits
mailing list