[flang-commits] [flang] 6e193b5 - Prepare for inlining of SUM intrinsic
Mats Petersson via flang-commits
flang-commits at lists.llvm.org
Thu Aug 4 12:56:47 PDT 2022
Author: Mats Petersson
Date: 2022-08-04T20:56:19+01:00
New Revision: 6e193b5cbb6d7591280e4d03a658bb11f9fcd4d9
URL: https://github.com/llvm/llvm-project/commit/6e193b5cbb6d7591280e4d03a658bb11f9fcd4d9
DIFF: https://github.com/llvm/llvm-project/commit/6e193b5cbb6d7591280e4d03a658bb11f9fcd4d9.diff
LOG: Prepare for inlining of SUM intrinsic
Find calls to FortranASum{Real8,Integer4}, check for dim and mask
arguments being absent - then produce an inlineable simple
version of the sum function.
(No longer a prototype, please review for push to llvm/main - not sure how to make Phabricator update the review with actual commit message)
Reviewed By: peixin, awarzynski
Differential Revision: https://reviews.llvm.org/D125407
Added:
flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
flang/test/Transforms/simplifyintrinsics.fir
Modified:
flang/include/flang/Optimizer/Transforms/Passes.h
flang/include/flang/Optimizer/Transforms/Passes.td
flang/lib/Optimizer/Transforms/CMakeLists.txt
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 7896fd3a90069..e6e2bbbda83e5 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -35,6 +35,8 @@ std::unique_ptr<mlir::Pass> createExternalNameConversionPass();
std::unique_ptr<mlir::Pass> createMemDataFlowOptPass();
std::unique_ptr<mlir::Pass> createPromoteToAffinePass();
std::unique_ptr<mlir::Pass> createMemoryAllocationPass();
+std::unique_ptr<mlir::Pass> createSimplifyIntrinsicsPass();
+
std::unique_ptr<mlir::Pass>
createMemoryAllocationPass(bool dynOnHeap, std::size_t maxStackSize);
std::unique_ptr<mlir::Pass> createAnnotateConstantOperandsPass();
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 442f542bf897b..b60de00838cae 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -174,6 +174,20 @@ def MemRefDataFlowOpt : Pass<"fir-memref-dataflow-opt", "::mlir::func::FuncOp">
];
}
+// This needs to be a "mlir::ModuleOp" pass, because it inserts simplified
+// functions into the module, which is invalid if a finer grain mlir::Operation
+// is used as the pass specification says to not touch things outside hte scope
+// of the operation being processed.
+def SimplifyIntrinsics : Pass<"simplify-intrinsics", "mlir::ModuleOp"> {
+ let summary = "Intrinsics simplification";
+ let description = [{
+ Qualifying intrinsics calls are replaced with calls to a specialized and
+ simplified function. The simplified function is added to the current module.
+ This function can be inlined by a general purpose inlining pass.
+ }];
+ let constructor = "::fir::createSimplifyIntrinsicsPass()";
+}
+
def MemoryAllocationOpt : Pass<"memory-allocation-opt", "mlir::func::FuncOp"> {
let summary = "Convert stack to heap allocations and vice versa.";
let description = [{
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index 2fd4aceb735a7..60891438ad5a5 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -11,6 +11,7 @@ add_flang_library(FIRTransforms
RewriteLoop.cpp
SimplifyRegionLite.cpp
AlgebraicSimplification.cpp
+ SimplifyIntrinsics.cpp
DEPENDS
FIRBuilder
diff --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
new file mode 100644
index 0000000000000..cc30694ff4769
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
@@ -0,0 +1,237 @@
+//===- SimplifyIntrinsics.cpp -- replace intrinsics with simpler form -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+/// \file
+/// This pass looks for suitable calls to runtime library for intrinsics that
+/// can be simplified/specialized and replaces with a specialized function.
+///
+/// For example, SUM(arr) can be specialized as a simple function with one loop,
+/// compared to the three arguments (plus file & line info) that the runtime
+/// call has - when the argument is a 1D-array (multiple loops may be needed
+// for higher dimension arrays, of course)
+///
+/// The general idea is that besides making the call simpler, it can also be
+/// inlined by other passes that run after this pass, which further improves
+/// performance, particularly when the work done in the function is trivial
+/// and small in size.
+//===----------------------------------------------------------------------===//
+
+#include "PassDetail.h"
+#include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Support/FIRContext.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "mlir/IR/Matchers.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "mlir/Transforms/RegionUtils.h"
+
+namespace {
+
+class SimplifyIntrinsicsPass
+ : public fir::SimplifyIntrinsicsBase<SimplifyIntrinsicsPass> {
+public:
+ mlir::func::FuncOp getOrCreateFunction(const mlir::Location &loc,
+ fir::FirOpBuilder &builder,
+ const mlir::Type &type,
+ const mlir::StringRef &basename);
+ void runOnOperation() override;
+};
+
+} // namespace
+
+mlir::func::FuncOp SimplifyIntrinsicsPass::getOrCreateFunction(
+ const mlir::Location &loc, fir::FirOpBuilder &builder,
+ const mlir::Type &type, const mlir::StringRef &baseName) {
+ // In future, the idea is that instead of building the function inside
+ // this function, this does the base creation, and calls a callback
+ // function (e.g. a lambda function) that fills in the actual content.
+ // For now, check that it's the ONLY the SUM runtime call.
+ assert(baseName.startswith("_FortranASum"));
+
+ std::string replacementName = mlir::Twine{baseName, "_simplified"}.str();
+ mlir::ModuleOp module = builder.getModule();
+ // If we already have a function, just return it.
+ mlir::func::FuncOp newFunc =
+ fir::FirOpBuilder::getNamedFunction(module, replacementName);
+ if (newFunc)
+ return newFunc;
+
+ // Need to build the function!
+ // Basic idea:
+ // function FortranASum<T>_simplified(arr)
+ // T, dimension(:) :: arr
+ // T sum = 0
+ // integer iter
+ // do iter = 0, extent(arr)
+ // sum = sum + arr[iter]
+ // end do
+ // FortranASum<T>_simplified = sum
+ // end function FortranASum<T>_simplified
+ mlir::Type boxType = fir::BoxType::get(builder.getNoneType());
+ mlir::FunctionType fType =
+ mlir::FunctionType::get(builder.getContext(), {boxType}, {type});
+ newFunc =
+ fir::FirOpBuilder::createFunction(loc, module, replacementName, fType);
+ auto inlineLinkage = mlir::LLVM::linkage::Linkage::LinkonceODR;
+ auto linkage =
+ mlir::LLVM::LinkageAttr::get(builder.getContext(), inlineLinkage);
+ newFunc->setAttr("llvm.linkage", linkage);
+
+ // Save the position of the original call.
+ mlir::OpBuilder::InsertPoint insertPt = builder.saveInsertionPoint();
+ builder.setInsertionPointToEnd(newFunc.addEntryBlock());
+
+ mlir::IndexType idxTy = builder.getIndexType();
+
+ mlir::Value zero = type.isa<mlir::FloatType>()
+ ? builder.createRealConstant(loc, type, 0.0)
+ : builder.createIntegerConstant(loc, type, 0);
+ mlir::Value sum = builder.create<fir::AllocaOp>(loc, type);
+ builder.create<fir::StoreOp>(loc, zero, sum);
+
+ mlir::Block::BlockArgListType args = newFunc.front().getArguments();
+ mlir::Value arg = args[0];
+
+ mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0);
+
+ fir::SequenceType::Shape flatShape = {fir::SequenceType::getUnknownExtent()};
+ mlir::Type arrTy = fir::SequenceType::get(flatShape, type);
+ mlir::Type boxArrTy = fir::BoxType::get(arrTy);
+ mlir::Value array = builder.create<fir::ConvertOp>(loc, boxArrTy, arg);
+ auto dims =
+ builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, array, zeroIdx);
+ mlir::Value len = dims.getResult(1);
+ mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
+ mlir::Value step = one;
+
+ // We use C indexing here, so len-1 as loopcount
+ mlir::Value loopCount = builder.create<mlir::arith::SubIOp>(loc, len, one);
+ auto loop = builder.create<fir::DoLoopOp>(loc, zeroIdx, loopCount, step);
+
+ // Begin loop code
+ mlir::OpBuilder::InsertPoint loopEndPt = builder.saveInsertionPoint();
+ builder.setInsertionPointToStart(loop.getBody());
+
+ mlir::Type eleRefTy = builder.getRefType(type);
+ mlir::Value index = loop.getInductionVar();
+ mlir::Value addr =
+ builder.create<fir::CoordinateOp>(loc, eleRefTy, array, index);
+ mlir::Value elem = builder.create<fir::LoadOp>(loc, addr);
+ mlir::Value sumVal = builder.create<fir::LoadOp>(loc, sum);
+
+ mlir::Value res;
+ if (type.isa<mlir::FloatType>())
+ res = builder.create<mlir::arith::AddFOp>(loc, elem, sumVal);
+ else if (type.isa<mlir::IntegerType>())
+ res = builder.create<mlir::arith::AddIOp>(loc, elem, sumVal);
+ else
+ TODO(loc, "Unsupported type");
+
+ builder.create<fir::StoreOp>(loc, res, sum);
+ // End of loop.
+ builder.restoreInsertionPoint(loopEndPt);
+
+ mlir::Value resultVal = builder.create<fir::LoadOp>(loc, sum);
+ builder.create<mlir::func::ReturnOp>(loc, resultVal);
+
+ // Now back to where we were adding code earlier...
+ builder.restoreInsertionPoint(insertPt);
+
+ return newFunc;
+}
+
+static bool isOperandAbsent(mlir::Value val) {
+ if (mlir::Operation *op = val.getDefiningOp())
+ return mlir::isa_and_nonnull<fir::AbsentOp>(
+ op->getOperand(0).getDefiningOp());
+ return false;
+}
+
+static bool isZero(mlir::Value val) {
+ if (mlir::Operation *op = val.getDefiningOp())
+ if (mlir::Operation *defOp = op->getOperand(0).getDefiningOp())
+ return mlir::matchPattern(defOp, mlir::m_Zero());
+ return false;
+}
+
+static mlir::Value findShape(mlir::Value val) {
+ mlir::Operation *defOp = val.getDefiningOp();
+ while (defOp) {
+ defOp = defOp->getOperand(0).getDefiningOp();
+ if (fir::EmboxOp box = mlir::dyn_cast_or_null<fir::EmboxOp>(defOp))
+ return box.getShape();
+ }
+ return {};
+}
+
+static unsigned getDimCount(mlir::Value val) {
+ if (mlir::Value shapeVal = findShape(val)) {
+ mlir::Type resType = shapeVal.getDefiningOp()->getResultTypes()[0];
+ return fir::getRankOfShapeType(resType);
+ }
+ return 0;
+}
+
+void SimplifyIntrinsicsPass::runOnOperation() {
+ mlir::ModuleOp module = getOperation();
+ fir::KindMapping kindMap = fir::getKindMapping(module);
+ module.walk([&](mlir::Operation *op) {
+ if (auto call = mlir::dyn_cast<fir::CallOp>(op)) {
+ if (mlir::SymbolRefAttr callee = call.getCalleeAttr()) {
+ mlir::StringRef funcName = callee.getLeafReference().getValue();
+ // Replace call to runtime function for SUM when it has single
+ // argument (no dim or mask argument) for 1D arrays with either
+ // Integer4 or Real8 types. Other forms are ignored.
+ // The new function is added to the module.
+ //
+ // Prototype for runtime call (from sum.cpp):
+ // RTNAME(Sum<T>)(const Descriptor &x, const char *source, int line,
+ // int dim, const Descriptor *mask)
+ if (funcName.startswith("_FortranASum")) {
+ mlir::Operation::operand_range args = call.getArgs();
+ // args[1] and args[2] are source filename and line number, ignored.
+ const mlir::Value &dim = args[3];
+ const mlir::Value &mask = args[4];
+ // dim is zero when it is absent, which is an implementation
+ // detail in the runtime library.
+ bool dimAndMaskAbsent = isZero(dim) && isOperandAbsent(mask);
+ unsigned rank = getDimCount(args[0]);
+ if (dimAndMaskAbsent && rank == 1) {
+ mlir::Location loc = call.getLoc();
+ mlir::Type type;
+ fir::FirOpBuilder builder(op, kindMap);
+ if (funcName.endswith("Integer4")) {
+ type = mlir::IntegerType::get(builder.getContext(), 32);
+ } else if (funcName.endswith("Real8")) {
+ type = mlir::FloatType::getF64(builder.getContext());
+ } else {
+ return;
+ }
+ mlir::func::FuncOp newFunc =
+ getOrCreateFunction(loc, builder, type, funcName);
+ auto newCall = builder.create<fir::CallOp>(
+ loc, newFunc, mlir::ValueRange{args[0]});
+ call->replaceAllUsesWith(newCall.getResults());
+ call->dropAllReferences();
+ call->erase();
+ }
+ }
+ }
+ }
+ });
+}
+
+std::unique_ptr<mlir::Pass> fir::createSimplifyIntrinsicsPass() {
+ return std::make_unique<SimplifyIntrinsicsPass>();
+}
diff --git a/flang/test/Transforms/simplifyintrinsics.fir b/flang/test/Transforms/simplifyintrinsics.fir
new file mode 100644
index 0000000000000..69f2cb8ae658c
--- /dev/null
+++ b/flang/test/Transforms/simplifyintrinsics.fir
@@ -0,0 +1,319 @@
+// RUN: fir-opt --split-input-file --simplify-intrinsics %s | FileCheck %s
+
+// Call to SUM with 1D I32 array is replaced.
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "native"} {
+ func.func @sum_1d_array_int(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_name = "a"}) -> i32 {
+ %c10 = arith.constant 10 : index
+ %0 = fir.alloca i32 {bindc_name = "test_sum_2", uniq_name = "_QFtest_sum_2Etest_sum_2"}
+ %1 = fir.shape %c10 : (index) -> !fir.shape<1>
+ %2 = fir.embox %arg0(%1) : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<10xi32>>
+ %3 = fir.absent !fir.box<i1>
+ %c0 = arith.constant 0 : index
+ %4 = fir.address_of(@_QQcl.2E2F6973756D5F322E66393000) : !fir.ref<!fir.char<1,13>>
+ %c5_i32 = arith.constant 5 : i32
+ %5 = fir.convert %2 : (!fir.box<!fir.array<10xi32>>) -> !fir.box<none>
+ %6 = fir.convert %4 : (!fir.ref<!fir.char<1,13>>) -> !fir.ref<i8>
+ %7 = fir.convert %c0 : (index) -> i32
+ %8 = fir.convert %3 : (!fir.box<i1>) -> !fir.box<none>
+ %9 = fir.call @_FortranASumInteger4(%5, %6, %c5_i32, %7, %8) : (!fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> i32
+ fir.store %9 to %0 : !fir.ref<i32>
+ %10 = fir.load %0 : !fir.ref<i32>
+ return %10 : i32
+ }
+ func.func private @_FortranASumInteger4(!fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> i32 attributes {fir.runtime}
+ fir.global linkonce @_QQcl.2E2F6973756D5F322E66393000 constant : !fir.char<1,13> {
+ %0 = fir.string_lit "./isum_2.f90\00"(13) : !fir.char<1,13>
+ fir.has_value %0 : !fir.char<1,13>
+ }
+}
+
+
+// CHECK-LABEL: func.func @sum_1d_array_int(
+// CHECK-SAME: %[[A:.*]]: !fir.ref<!fir.array<10xi32>> {fir.bindc_name = "a"}) -> i32 {
+// CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1>
+// CHECK: %[[A_BOX_I32:.*]] = fir.embox %[[A]](%[[SHAPE]]) : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<10xi32>>
+// CHECK: %[[A_BOX_NONE:.*]] = fir.convert %[[A_BOX_I32]] : (!fir.box<!fir.array<10xi32>>) -> !fir.box<none>
+// CHECK-NOT: fir.call @_FortranASumInteger4({{.*}})
+// CHECK: %[[RES:.*]] = fir.call @_FortranASumInteger4_simplified(%[[A_BOX_NONE]]) : (!fir.box<none>) -> i32
+// CHECK-NOT: fir.call @_FortranASumInteger4({{.*}})
+// CHECK: return %{{.*}} : i32
+// CHECK: }
+// CHECK: func.func private @_FortranASumInteger4(!fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> i32 attributes {fir.runtime}
+
+// CHECK-LABEL: func.func private @_FortranASumInteger4_simplified(
+// CHECK-SAME: %[[ARR:.*]]: !fir.box<none>) -> i32 attributes {llvm.linkage = #llvm.linkage<linkonce_odr>} {
+// CHECK: %[[CI32_0:.*]] = arith.constant 0 : i32
+// CHECK: %[[SUM:.*]] = fir.alloca i32
+// CHECK: fir.store %[[CI32_0]] to %[[SUM]] : !fir.ref<i32>
+// CHECK: %[[CINDEX_0:.*]] = arith.constant 0 : index
+// CHECK: %[[ARR_BOX_I32:.*]] = fir.convert %[[ARR]] : (!fir.box<none>) -> !fir.box<!fir.array<?xi32>>
+// CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[ARR_BOX_I32]], %[[CINDEX_0]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK: %[[CINDEX_1:.*]] = arith.constant 1 : index
+// CHECK: %[[EXTENT:.*]] = arith.subi %[[DIMS]]#1, %[[CINDEX_1]] : index
+// CHECK: fir.do_loop %[[ITER:.*]] = %[[CINDEX_0]] to %[[EXTENT]] step %[[CINDEX_1]] {
+// CHECK: %[[ITEM:.*]] = fir.coordinate_of %[[ARR_BOX_I32]], %[[ITER]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: %[[ITEM_VAL:.*]] = fir.load %[[ITEM]] : !fir.ref<i32>
+// CHECK: %[[SUM_VAL:.*]] = fir.load %[[SUM]] : !fir.ref<i32>
+// CHECK: %[[NEW_SUM:.*]] = arith.addi %[[ITEM_VAL]], %[[SUM_VAL]] : i32
+// CHECK: fir.store %[[NEW_SUM]] to %[[SUM]] : !fir.ref<i32>
+// CHECK: }
+// CHECK: %[[RET:.*]] = fir.load %[[SUM]] : !fir.ref<i32>
+// CHECK: return %[[RET]] : i32
+// CHECK: }
+
+// -----
+
+// Call to SUM with 2D I32 arrays is not replaced.
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "native"} {
+ func.func @sum_2d_array_int(%arg0: !fir.ref<!fir.array<10x10xi32>> {fir.bindc_name = "a"}) -> i32 {
+ %c10 = arith.constant 10 : index
+ %c10_0 = arith.constant 10 : index
+ %0 = fir.alloca i32 {bindc_name = "test_sum_3", uniq_name = "_QFtest_sum_3Etest_sum_3"}
+ %1 = fir.shape %c10, %c10_0 : (index, index) -> !fir.shape<2>
+ %2 = fir.embox %arg0(%1) : (!fir.ref<!fir.array<10x10xi32>>, !fir.shape<2>) -> !fir.box<!fir.array<10x10xi32>>
+ %3 = fir.absent !fir.box<i1>
+ %c0 = arith.constant 0 : index
+ %4 = fir.address_of(@_QQcl.2E2F6973756D5F332E66393000) : !fir.ref<!fir.char<1,13>>
+ %c5_i32 = arith.constant 5 : i32
+ %5 = fir.convert %2 : (!fir.box<!fir.array<10x10xi32>>) -> !fir.box<none>
+ %6 = fir.convert %4 : (!fir.ref<!fir.char<1,13>>) -> !fir.ref<i8>
+ %7 = fir.convert %c0 : (index) -> i32
+ %8 = fir.convert %3 : (!fir.box<i1>) -> !fir.box<none>
+ %9 = fir.call @_FortranASumInteger4(%5, %6, %c5_i32, %7, %8) : (!fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> i32
+ fir.store %9 to %0 : !fir.ref<i32>
+ %10 = fir.load %0 : !fir.ref<i32>
+ return %10 : i32
+ }
+ func.func private @_FortranASumInteger4(!fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> i32 attributes {fir.runtime}
+ fir.global linkonce @_QQcl.2E2F6973756D5F332E66393000 constant : !fir.char<1,13> {
+ %0 = fir.string_lit "./isum_3.f90\00"(13) : !fir.char<1,13>
+ fir.has_value %0 : !fir.char<1,13>
+ }
+}
+
+// CHECK-LABEL: func.func @sum_2d_array_int({{.*}} !fir.ref<!fir.array<10x10xi32>> {fir.bindc_name = "a"}) -> i32 {
+// CHECK-NOT: fir.call @_FortranASumInteger4_simplified({{.*}})
+// CHECK: fir.call @_FortranASumInteger4({{.*}}) : (!fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> i32
+// CHECK-NOT: fir.call @_FortranASumInteger4_simplified({{.*}})
+
+// -----
+
+// Call to SUM with 1D F64 is replaced.
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "native"} {
+ func.func @sum_1d_real(%arg0: !fir.ref<!fir.array<10xf64>> {fir.bindc_name = "a"}) -> f64 {
+ %c10 = arith.constant 10 : index
+ %0 = fir.alloca f64 {bindc_name = "sum_1d_real", uniq_name = "_QFsum_1d_realEsum_1d_real"}
+ %1 = fir.shape %c10 : (index) -> !fir.shape<1>
+ %2 = fir.embox %arg0(%1) : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> !fir.box<!fir.array<10xf64>>
+ %3 = fir.absent !fir.box<i1>
+ %c0 = arith.constant 0 : index
+ %4 = fir.address_of(@_QQcl.2E2F6973756D5F352E66393000) : !fir.ref<!fir.char<1,13>>
+ %c5_i32 = arith.constant 5 : i32
+ %5 = fir.convert %2 : (!fir.box<!fir.array<10xf64>>) -> !fir.box<none>
+ %6 = fir.convert %4 : (!fir.ref<!fir.char<1,13>>) -> !fir.ref<i8>
+ %7 = fir.convert %c0 : (index) -> i32
+ %8 = fir.convert %3 : (!fir.box<i1>) -> !fir.box<none>
+ %9 = fir.call @_FortranASumReal8(%5, %6, %c5_i32, %7, %8) : (!fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> f64
+ fir.store %9 to %0 : !fir.ref<f64>
+ %10 = fir.load %0 : !fir.ref<f64>
+ return %10 : f64
+ }
+ func.func private @_FortranASumReal8(!fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> f64 attributes {fir.runtime}
+ fir.global linkonce @_QQcl.2E2F6973756D5F352E66393000 constant : !fir.char<1,13> {
+ %0 = fir.string_lit "./isum_5.f90\00"(13) : !fir.char<1,13>
+ fir.has_value %0 : !fir.char<1,13>
+ }
+}
+
+
+// CHECK-LABEL: func.func @sum_1d_real(
+// CHECK-SAME: %[[A:.*]]: !fir.ref<!fir.array<10xf64>> {fir.bindc_name = "a"}) -> f64 {
+// CHECK: %[[CINDEX_10:.*]] = arith.constant 10 : index
+// CHECK: %[[SHAPE:.*]] = fir.shape %[[CINDEX_10]] : (index) -> !fir.shape<1>
+// CHECK: %[[A_BOX_F64:.*]] = fir.embox %[[A]](%[[SHAPE]]) : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> !fir.box<!fir.array<10xf64>>
+// CHECK: %[[A_BOX_NONE:.*]] = fir.convert %[[A_BOX_F64]] : (!fir.box<!fir.array<10xf64>>) -> !fir.box<none>
+// CHECK-NOT: fir.call @_FortranASumReal8({{.*}})
+// CHECK: %[[RES:.*]] = fir.call @_FortranASumReal8_simplified(%[[A_BOX_NONE]]) : (!fir.box<none>) -> f64
+// CHECK-NOT: fir.call @_FortranASumReal8({{.*}})
+// CHECK: return %{{.*}} : f64
+// CHECK: }
+
+// CHECK-LABEL: func.func private @_FortranASumReal8_simplified(
+// CHECK-SAME: %[[ARR:.*]]: !fir.box<none>) -> f64 attributes {llvm.linkage = #llvm.linkage<linkonce_odr>} {
+// CHECK: %[[ZERO:.*]] = arith.constant 0.000000e+00 : f64
+// CHECK: %[[SUM:.*]] = fir.alloca f64
+// CHECK: fir.store %[[ZERO]] to %[[SUM]] : !fir.ref<f64>
+// CHECK: %[[CINDEX_0:.*]] = arith.constant 0 : index
+// CHECK: %[[ARR_BOX_F64:.*]] = fir.convert %[[ARR]] : (!fir.box<none>) -> !fir.box<!fir.array<?xf64>>
+// CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[ARR_BOX_F64]], %[[CINDEX_0]] : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
+// CHECK: %[[CINDEX_1:.*]] = arith.constant 1 : index
+// CHECK: %[[EXTENT:.*]] = arith.subi %[[DIMS]]#1, %[[CINDEX_1]] : index
+// CHECK: fir.do_loop %[[ITER:.*]] = %[[CINDEX_0]] to %[[EXTENT]] step %[[CINDEX_1]] {
+// CHECK: %[[ITEM:.*]] = fir.coordinate_of %[[ARR_BOX_F64]], %[[ITER]] : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
+// CHECK: %[[ITEM_VAL:.*]] = fir.load %[[ITEM]] : !fir.ref<f64>
+// CHECK: %[[SUM_VAL:.*]] = fir.load %[[SUM]] : !fir.ref<f64>
+// CHECK: %[[NEW_SUM:.*]] = arith.addf %[[ITEM_VAL]], %[[SUM_VAL]] : f64
+// CHECK: fir.store %[[NEW_SUM]] to %[[SUM]] : !fir.ref<f64>
+// CHECK: }
+// CHECK: %[[RES:.*]] = fir.load %[[SUM]] : !fir.ref<f64>
+// CHECK: return %[[RES]] : f64
+// CHECK: }
+
+// -----
+
+// Call to SUM with 1D COMPLEX array is not replaced.
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "native"} {
+ func.func @sum_1d_complex(%arg0: !fir.ref<!fir.array<10x!fir.complex<4>>> {fir.bindc_name = "a"}) -> !fir.complex<4> {
+ %0 = fir.alloca !fir.complex<4>
+ %c10 = arith.constant 10 : index
+ %1 = fir.alloca !fir.complex<4> {bindc_name = "sum_1d_complex", uniq_name = "_QFsum_1d_complexEsum_1d_complex"}
+ %2 = fir.shape %c10 : (index) -> !fir.shape<1>
+ %3 = fir.embox %arg0(%2) : (!fir.ref<!fir.array<10x!fir.complex<4>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.complex<4>>>
+ %4 = fir.absent !fir.box<i1>
+ %c0 = arith.constant 0 : index
+ %5 = fir.address_of(@_QQcl.2E2F6973756D5F362E66393000) : !fir.ref<!fir.char<1,13>>
+ %c5_i32 = arith.constant 5 : i32
+ %6 = fir.convert %0 : (!fir.ref<!fir.complex<4>>) -> !fir.ref<complex<f32>>
+ %7 = fir.convert %3 : (!fir.box<!fir.array<10x!fir.complex<4>>>) -> !fir.box<none>
+ %8 = fir.convert %5 : (!fir.ref<!fir.char<1,13>>) -> !fir.ref<i8>
+ %9 = fir.convert %c0 : (index) -> i32
+ %10 = fir.convert %4 : (!fir.box<i1>) -> !fir.box<none>
+ %11 = fir.call @_FortranACppSumComplex4(%6, %7, %8, %c5_i32, %9, %10) : (!fir.ref<complex<f32>>, !fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> none
+ %12 = fir.load %0 : !fir.ref<!fir.complex<4>>
+ fir.store %12 to %1 : !fir.ref<!fir.complex<4>>
+ %13 = fir.load %1 : !fir.ref<!fir.complex<4>>
+ return %13 : !fir.complex<4>
+ }
+ func.func private @_FortranACppSumComplex4(!fir.ref<complex<f32>>, !fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> none attributes {fir.runtime}
+ fir.global linkonce @_QQcl.2E2F6973756D5F362E66393000 constant : !fir.char<1,13> {
+ %0 = fir.string_lit "./isum_6.f90\00"(13) : !fir.char<1,13>
+ fir.has_value %0 : !fir.char<1,13>
+ }
+}
+
+// CHECK-LABEL: func.func @sum_1d_complex(%{{.*}}: !fir.ref<!fir.array<10x!fir.complex<4>>> {fir.bindc_name = "a"}) -> !fir.complex<4> {
+// CHECK-NOT: fir.call @_FortranACppSumComplex4_simplified({{.*}})
+// CHECK: fir.call @_FortranACppSumComplex4({{.*}}) : (!fir.ref<complex<f32>>, !fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> none
+// CHECK-NOT: fir.call @_FortranACppSumComplex4_simplified({{.*}})
+
+// -----
+
+// Test that two functions calling the same SUM function
+// generates only ONE function declaration (and that both
+// calls are converted)
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "native"} {
+ func.func @sum_1d_calla(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_name = "a"}) -> i32 {
+ %c10 = arith.constant 10 : index
+ %0 = fir.alloca i32 {bindc_name = "sum_1d_calla", uniq_name = "_QFsum_1d_callaEsum_1d_calla"}
+ %1 = fir.shape %c10 : (index) -> !fir.shape<1>
+ %2 = fir.embox %arg0(%1) : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<10xi32>>
+ %3 = fir.absent !fir.box<i1>
+ %c0 = arith.constant 0 : index
+ %4 = fir.address_of(@_QQcl.2E2F6973756D5F372E66393000) : !fir.ref<!fir.char<1,13>>
+ %c5_i32 = arith.constant 5 : i32
+ %5 = fir.convert %2 : (!fir.box<!fir.array<10xi32>>) -> !fir.box<none>
+ %6 = fir.convert %4 : (!fir.ref<!fir.char<1,13>>) -> !fir.ref<i8>
+ %7 = fir.convert %c0 : (index) -> i32
+ %8 = fir.convert %3 : (!fir.box<i1>) -> !fir.box<none>
+ %9 = fir.call @_FortranASumInteger4(%5, %6, %c5_i32, %7, %8) : (!fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> i32
+ fir.store %9 to %0 : !fir.ref<i32>
+ %10 = fir.load %0 : !fir.ref<i32>
+ return %10 : i32
+ }
+ func.func @sum_1d_callb(%arg0: !fir.ref<!fir.array<20xi32>> {fir.bindc_name = "a"}) -> i32 {
+ %c20 = arith.constant 20 : index
+ %0 = fir.alloca i32 {bindc_name = "sum_1d_callb", uniq_name = "_QFsum_1d_callbEsum_1d_callb"}
+ %1 = fir.shape %c20 : (index) -> !fir.shape<1>
+ %2 = fir.embox %arg0(%1) : (!fir.ref<!fir.array<20xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<20xi32>>
+ %3 = fir.absent !fir.box<i1>
+ %c0 = arith.constant 0 : index
+ %4 = fir.address_of(@_QQcl.2E2F6973756D5F372E66393000) : !fir.ref<!fir.char<1,13>>
+ %c12_i32 = arith.constant 12 : i32
+ %5 = fir.convert %2 : (!fir.box<!fir.array<20xi32>>) -> !fir.box<none>
+ %6 = fir.convert %4 : (!fir.ref<!fir.char<1,13>>) -> !fir.ref<i8>
+ %7 = fir.convert %c0 : (index) -> i32
+ %8 = fir.convert %3 : (!fir.box<i1>) -> !fir.box<none>
+ %9 = fir.call @_FortranASumInteger4(%5, %6, %c12_i32, %7, %8) : (!fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> i32
+ fir.store %9 to %0 : !fir.ref<i32>
+ %10 = fir.load %0 : !fir.ref<i32>
+ return %10 : i32
+ }
+ func.func private @_FortranASumInteger4(!fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> i32 attributes {fir.runtime}
+ fir.global linkonce @_QQcl.2E2F6973756D5F372E66393000 constant : !fir.char<1,13> {
+ %0 = fir.string_lit "./isum_7.f90\00"(13) : !fir.char<1,13>
+ fir.has_value %0 : !fir.char<1,13>
+ }
+}
+
+// CHECK-LABEL: func.func @sum_1d_calla(%{{.*}}) -> i32 {
+// CHECK-NOT: fir.call @_FortranASumInteger4({{.*}})
+// CHECK: fir.call @_FortranASumInteger4_simplified(%{{.*}})
+// CHECK-NOT: fir.call @_FortranASumInteger4({{.*}})
+// CHECK: }
+
+// CHECK-LABEL: func.func @sum_1d_callb(%{{.*}}) -> i32 {
+// CHECK-NOT: fir.call @_FortranASumInteger4({{.*}})
+// CHECK: fir.call @_FortranASumInteger4_simplified(%{{.*}})
+// CHECK-NOT: fir.call @_FortranASumInteger4({{.*}})
+// CHECK: }
+
+// CHECK-LABEL: func.func private @_FortranASumInteger4_simplified({{.*}}) -> i32 {{.*}} {
+// CHECK: return %{{.*}} : i32
+// CHECK: }
+// CHECK-NOT: func.func private @_FortranASumInteger4_simplified({{.*}})
+
+// -----
+
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "native"} {
+ func.func @sum_1d_stride(%arg0: !fir.ref<!fir.array<20xi32>> {fir.bindc_name = "a"}) -> i32 {
+ %c20 = arith.constant 20 : index
+ %0 = fir.alloca i32 {bindc_name = "sum_1d_stride", uniq_name = "_QFsum_1d_strideEsum_1d_stride"}
+ %c1 = arith.constant 1 : index
+ %c2_i64 = arith.constant 2 : i64
+ %1 = fir.convert %c2_i64 : (i64) -> index
+ %2 = arith.addi %c1, %c20 : index
+ %3 = arith.subi %2, %c1 : index
+ %4 = fir.shape %c20 : (index) -> !fir.shape<1>
+ %5 = fir.slice %c1, %3, %1 : (index, index, index) -> !fir.slice<1>
+ %6 = fir.embox %arg0(%4) [%5] : (!fir.ref<!fir.array<20xi32>>, !fir.shape<1>, !fir.slice<1>) -> !fir.box<!fir.array<?xi32>>
+ %7 = fir.absent !fir.box<i1>
+ %c0 = arith.constant 0 : index
+ %8 = fir.address_of(@_QQcl.2E2F6973756D5F382E66393000) : !fir.ref<!fir.char<1,13>>
+ %c5_i32 = arith.constant 5 : i32
+ %9 = fir.convert %6 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+ %10 = fir.convert %8 : (!fir.ref<!fir.char<1,13>>) -> !fir.ref<i8>
+ %11 = fir.convert %c0 : (index) -> i32
+ %12 = fir.convert %7 : (!fir.box<i1>) -> !fir.box<none>
+ %13 = fir.call @_FortranASumInteger4(%9, %10, %c5_i32, %11, %12) : (!fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> i32
+ fir.store %13 to %0 : !fir.ref<i32>
+ %14 = fir.load %0 : !fir.ref<i32>
+ return %14 : i32
+ }
+ func.func private @_FortranASumInteger4(!fir.box<none>, !fir.ref<i8>, i32, i32, !fir.box<none>) -> i32 attributes {fir.runtime}
+ fir.global linkonce @_QQcl.2E2F6973756D5F382E66393000 constant : !fir.char<1,13> {
+ %0 = fir.string_lit "./isum_8.f90\00"(13) : !fir.char<1,13>
+ fir.has_value %0 : !fir.char<1,13>
+ }
+}
+
+// CHECK-LABEL: func.func @sum_1d_stride(%{{.*}} -> i32 {
+// CHECK: %[[CI64_2:.*]] = arith.constant 2 : i64
+// CHECK: %[[CINDEX_2:.*]] = fir.convert %[[CI64_2]] : (i64) -> index
+// CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}}
+// CHECK: %[[SLICE:.*]] = fir.slice %{{.*}}, %{{.*}}, %[[CINDEX_2]] : (index, index, index) -> !fir.slice<1>
+// CHECK: %[[A_BOX_I32:.*]] = fir.embox %{{.*}}(%[[SHAPE]]) {{\[}}%[[SLICE]]] : (!fir.ref<!fir.array<20xi32>>, !fir.shape<1>, !fir.slice<1>) -> !fir.box<!fir.array<?xi32>>
+// CHECK: %[[A_BOX_NONE:.*]] = fir.convert %[[A_BOX_I32]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+// CHECK: %{{.*}} = fir.call @_FortranASumInteger4_simplified(%[[A_BOX_NONE]]) : (!fir.box<none>) -> i32
+// CHECK: return %{{.*}} : i32
+// CHECK: }
+
+// CHECK-LABEL: func.func private @_FortranASumInteger4_simplified(%{{.*}}) -> i32 attributes {llvm.linkage = #llvm.linkage<linkonce_odr>} {
+// CHECK: %[[ARR_BOX_I32:.*]] = fir.convert %{{.*}} : (!fir.box<none>) -> !fir.box<!fir.array<?xi32>>
+// CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[ARR_BOX_I32]], %{{.*}} : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK: %[[CINDEX_1:.*]] = arith.constant 1 : index
+// CHECK: %[[EXTENT:.*]] = arith.subi %[[DIMS]]#1, %[[CINDEX_1]] : index
+// CHECK: fir.do_loop %[[ITER:.*]] = %{{.*}} to %[[EXTENT]] step %[[CINDEX_1]] {
+// CHECK: %{{.*}} = fir.coordinate_of %[[ARR_BOX_I32]], %[[ITER]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK: }
+// CHECK: return %{{.*}} : i32
+// CHECK: }
More information about the flang-commits
mailing list