[flang-commits] [flang] 3d092e3 - Add a new memory allocation rewrite pass.

Eric Schweitz via flang-commits flang-commits at lists.llvm.org
Wed Dec 15 15:45:23 PST 2021


Author: Eric Schweitz
Date: 2021-12-15T15:45:08-08:00
New Revision: 3d092e31c1f5fab9b427994970616bc5dde3b3be

URL: https://github.com/llvm/llvm-project/commit/3d092e31c1f5fab9b427994970616bc5dde3b3be
DIFF: https://github.com/llvm/llvm-project/commit/3d092e31c1f5fab9b427994970616bc5dde3b3be.diff

LOG: Add a new memory allocation rewrite pass.

This pass can reclassify memory allocations (fir.alloca, fir.allocmem)
based on heuristics and settings. The intention is to allow better
performance and workarounds for conditions such as environments with
limited stack space.

Currently, implements two conversions from stack to heap allocation.
  1. If a stack allocation is an array larger than some threshold value
     make it a heap allocation.
  2. If a stack allocation is an array with a runtime evaluated size make
     it a heap allocation.

Add a lit test for both suboptions.

Reviewed By: PeteSteinfeld, vdonaldson

Differential Revision: https://reviews.llvm.org/D115763

Added: 
    flang/lib/Optimizer/Transforms/MemoryAllocation.cpp
    flang/test/Fir/memory-allocation-opt.fir

Modified: 
    flang/include/flang/Optimizer/Transforms/Passes.h
    flang/include/flang/Optimizer/Transforms/Passes.td
    flang/lib/Optimizer/Transforms/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 91d260b5543dd..f0b9d1137386a 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -34,6 +34,7 @@ std::unique_ptr<mlir::Pass> createCharacterConversionPass();
 std::unique_ptr<mlir::Pass> createExternalNameConversionPass();
 std::unique_ptr<mlir::Pass> createMemDataFlowOptPass();
 std::unique_ptr<mlir::Pass> createPromoteToAffinePass();
+std::unique_ptr<mlir::Pass> createMemoryAllocationPass();
 
 /// Support for inlining on FIR.
 bool canLegallyInline(mlir::Operation *op, mlir::Region *reg,

diff  --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 58ddc465532a7..ec990476f27db 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -152,4 +152,22 @@ def MemRefDataFlowOpt : FunctionPass<"fir-memref-dataflow-opt"> {
   ];
 }
 
+def MemoryAllocationOpt : Pass<"memory-allocation-opt", "mlir::FuncOp"> {
+  let summary = "Convert stack to heap allocations and vice versa.";
+  let description = [{
+    Convert stack allocations to heap allocations and vice versa based on
+    estimated size, lifetime, usage patterns, the call tree, etc.
+  }];
+  let dependentDialects = [ "fir::FIROpsDialect" ];
+  let options = [
+    Option<"dynamicArrayOnHeap", "dynamic-array-on-heap",
+           "bool", /*default=*/"false",
+           "Allocate all arrays with runtime determined size on heap.">,
+    Option<"maxStackArraySize", "maximum-array-alloc-size",
+           "std::size_t", /*default=*/"~static_cast<std::size_t>(0)",
+           "Set maximum number of elements of an array allocated on the stack.">
+  ];
+  let constructor = "::fir::createMemoryAllocationPass()";
+}
+
 #endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES

diff  --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index 642e40e0fd111..767145e171aeb 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -6,6 +6,7 @@ add_flang_library(FIRTransforms
   ArrayValueCopy.cpp
   Inliner.cpp
   ExternalNameConversion.cpp
+  MemoryAllocation.cpp
   MemRefDataFlowOpt.cpp
   RewriteLoop.cpp
 

diff  --git a/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp b/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp
new file mode 100644
index 0000000000000..4c0144f757186
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp
@@ -0,0 +1,186 @@
+//===- MemoryAllocation.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PassDetail.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/IR/Diagnostics.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "mlir/Transforms/Passes.h"
+#include "llvm/ADT/TypeSwitch.h"
+
+#define DEBUG_TYPE "flang-memory-allocation-opt"
+
+// Number of elements in an array does not determine where it is allocated.
+static constexpr std::size_t UnlimitedArraySize = ~static_cast<std::size_t>(0);
+
+namespace {
+struct MemoryAllocationOptions {
+  // Always move dynamic array allocations to the heap. This may result in more
+  // heap fragmentation, so may impact performance negatively.
+  bool dynamicArrayOnHeap = false;
+
+  // Number of elements in array threshold for moving to heap. In environments
+  // with limited stack size, moving large arrays to the heap can avoid running
+  // out of stack space.
+  std::size_t maxStackArraySize = UnlimitedArraySize;
+};
+
+class ReturnAnalysis {
+public:
+  ReturnAnalysis(mlir::Operation *op) {
+    if (auto func = mlir::dyn_cast<mlir::FuncOp>(op))
+      for (mlir::Block &block : func)
+        for (mlir::Operation &i : block)
+          if (mlir::isa<mlir::ReturnOp>(i)) {
+            returnMap[op].push_back(&i);
+            break;
+          }
+  }
+
+  llvm::SmallVector<mlir::Operation *> getReturns(mlir::Operation *func) const {
+    auto iter = returnMap.find(func);
+    if (iter != returnMap.end())
+      return iter->second;
+    return {};
+  }
+
+private:
+  llvm::DenseMap<mlir::Operation *, llvm::SmallVector<mlir::Operation *>>
+      returnMap;
+};
+} // namespace
+
+/// Return `true` if this allocation is to remain on the stack (`fir.alloca`).
+/// Otherwise the allocation should be moved to the heap (`fir.allocmem`).
+static inline bool keepStackAllocation(fir::AllocaOp alloca, mlir::Block *entry,
+                                       const MemoryAllocationOptions &options) {
+  // Limitation: only arrays allocated on the stack in the entry block are
+  // considered for now.
+  // TODO: Generalize the algorithm and placement of the freemem nodes.
+  if (alloca->getBlock() != entry)
+    return true;
+  if (auto seqTy = alloca.getInType().dyn_cast<fir::SequenceType>()) {
+    if (fir::hasDynamicSize(seqTy)) {
+      // Move all arrays with runtime determined size to the heap.
+      if (options.dynamicArrayOnHeap)
+        return false;
+    } else {
+      std::int64_t numberOfElements = 1;
+      for (std::int64_t i : seqTy.getShape()) {
+        numberOfElements *= i;
+        // If the count is suspicious, then don't change anything here.
+        if (numberOfElements <= 0)
+          return true;
+      }
+      // If the number of elements exceeds the threshold, move the allocation to
+      // the heap.
+      if (static_cast<std::size_t>(numberOfElements) >
+          options.maxStackArraySize) {
+        LLVM_DEBUG(llvm::dbgs()
+                   << "memory allocation opt: found " << alloca << '\n');
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+namespace {
+class AllocaOpConversion : public mlir::OpRewritePattern<fir::AllocaOp> {
+public:
+  using OpRewritePattern::OpRewritePattern;
+
+  AllocaOpConversion(mlir::MLIRContext *ctx,
+                     llvm::ArrayRef<mlir::Operation *> rets)
+      : OpRewritePattern(ctx), returnOps(rets) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(fir::AllocaOp alloca,
+                  mlir::PatternRewriter &rewriter) const override {
+    auto loc = alloca.getLoc();
+    mlir::Type varTy = alloca.getInType();
+    auto unpackName =
+        [](llvm::Optional<llvm::StringRef> opt) -> llvm::StringRef {
+      if (opt)
+        return *opt;
+      return {};
+    };
+    auto uniqName = unpackName(alloca.uniq_name());
+    auto bindcName = unpackName(alloca.bindc_name());
+    auto heap = rewriter.create<fir::AllocMemOp>(
+        loc, varTy, uniqName, bindcName, alloca.typeparams(), alloca.shape());
+    auto insPt = rewriter.saveInsertionPoint();
+    for (mlir::Operation *retOp : returnOps) {
+      rewriter.setInsertionPoint(retOp);
+      [[maybe_unused]] auto free = rewriter.create<fir::FreeMemOp>(loc, heap);
+      LLVM_DEBUG(llvm::dbgs() << "memory allocation opt: add free " << free
+                              << " for " << heap << '\n');
+    }
+    rewriter.restoreInsertionPoint(insPt);
+    rewriter.replaceOpWithNewOp<fir::ConvertOp>(
+        alloca, fir::ReferenceType::get(varTy), heap);
+    LLVM_DEBUG(llvm::dbgs() << "memory allocation opt: replaced " << alloca
+                            << " with " << heap << '\n');
+    return mlir::success();
+  }
+
+private:
+  llvm::ArrayRef<mlir::Operation *> returnOps;
+};
+
+/// This pass can reclassify memory allocations (fir.alloca, fir.allocmem) based
+/// on heuristics and settings. The intention is to allow better performance and
+/// workarounds for conditions such as environments with limited stack space.
+///
+/// Currently, implements two conversions from stack to heap allocation.
+///   1. If a stack allocation is an array larger than some threshold value
+///      make it a heap allocation.
+///   2. If a stack allocation is an array with a runtime evaluated size make
+///      it a heap allocation.
+class MemoryAllocationOpt
+    : public fir::MemoryAllocationOptBase<MemoryAllocationOpt> {
+public:
+  void runOnOperation() override {
+    auto *context = &getContext();
+    auto func = getOperation();
+    mlir::OwningRewritePatternList patterns(context);
+    mlir::ConversionTarget target(*context);
+    MemoryAllocationOptions options = {dynamicArrayOnHeap.getValue(),
+                                       maxStackArraySize.getValue()};
+
+    // If func is a declaration, skip it.
+    if (func.empty())
+      return;
+
+    const auto &analysis = getAnalysis<ReturnAnalysis>();
+
+    target.addLegalDialect<fir::FIROpsDialect, mlir::arith::ArithmeticDialect,
+                           mlir::StandardOpsDialect>();
+    target.addDynamicallyLegalOp<fir::AllocaOp>([&](fir::AllocaOp alloca) {
+      return keepStackAllocation(alloca, &func.front(), options);
+    });
+
+    patterns.insert<AllocaOpConversion>(context, analysis.getReturns(func));
+    if (mlir::failed(
+            mlir::applyPartialConversion(func, target, std::move(patterns)))) {
+      mlir::emitError(func.getLoc(),
+                      "error in memory allocation optimization\n");
+      signalPassFailure();
+    }
+  }
+};
+} // namespace
+
+std::unique_ptr<mlir::Pass> fir::createMemoryAllocationPass() {
+  return std::make_unique<MemoryAllocationOpt>();
+}

diff  --git a/flang/test/Fir/memory-allocation-opt.fir b/flang/test/Fir/memory-allocation-opt.fir
new file mode 100644
index 0000000000000..609773dc15686
--- /dev/null
+++ b/flang/test/Fir/memory-allocation-opt.fir
@@ -0,0 +1,34 @@
+// RUN: fir-opt --memory-allocation-opt="dynamic-array-on-heap=true maximum-array-alloc-size=1024" %s | FileCheck %s
+
+// Test for size of array being too big.
+
+// CHECK-LABEL: func @_QPs1(
+// CHECK: %[[mem:.*]] = fir.allocmem !fir.array<1000123xi32> {bindc_name = "array", uniq_name = "_QFs1Earray"}
+// CHECK: fir.call @_QPs3(
+// CHECK: fir.freemem %[[mem]]
+// CHECK-NEXT: return
+
+func @_QPs1() {
+  %0 = fir.alloca !fir.array<1000123xi32> {bindc_name = "array", uniq_name = "_QFs1Earray"}
+  fir.call @_QPs3(%0) : (!fir.ref<!fir.array<1000123xi32>>) -> ()
+  return
+}
+
+// Test for dynamic array.
+
+// CHECK-LABEL: func @_QPs2(
+// CHECK: %[[mem:.*]] = fir.allocmem !fir.array<?xi32>, %{{[0-9]+}} {bindc_name = "array", uniq_name = "_QFs2Earray"}
+// CHECK: fir.call @_QPs3(
+// CHECK: fir.freemem %[[mem]]
+// CHECK-NEXT: return
+
+func @_QPs2(%arg0: !fir.ref<i32>) {
+  %0 = fir.load %arg0 : !fir.ref<i32>
+  %1 = fir.convert %0 : (i32) -> index
+  %2 = fir.alloca !fir.array<?xi32>, %1 {bindc_name = "array", uniq_name = "_QFs2Earray"}
+  %3 = fir.convert %2 : (!fir.ref<!fir.array<?xi32>>) -> !fir.ref<!fir.array<1000123xi32>>
+  fir.call @_QPs3(%3) : (!fir.ref<!fir.array<1000123xi32>>) -> ()
+  return
+}
+func private @_QPs3(!fir.ref<!fir.array<1000123xi32>>)
+


        


More information about the flang-commits mailing list