[flang-commits] [flang] a716ace - Add loop-versioning pass to improve unit-stride
Mats Petersson via flang-commits
flang-commits at lists.llvm.org
Tue Apr 18 01:53:25 PDT 2023
Author: Mats Petersson
Date: 2023-04-18T09:53:07+01:00
New Revision: a716ace13dca3adc81885670d0f8ff14e866ce97
URL: https://github.com/llvm/llvm-project/commit/a716ace13dca3adc81885670d0f8ff14e866ce97
DIFF: https://github.com/llvm/llvm-project/commit/a716ace13dca3adc81885670d0f8ff14e866ce97.diff
LOG: Add loop-versioning pass to improve unit-stride
Introduce conditional code to identify stride of "one element", and simplify the array accesses for that case.
This allows better loop performance in various benchmarks.
Reviewed By: tblah, kiranchandramohan
Differential Revision: https://reviews.llvm.org/D141306
Added:
flang/lib/Optimizer/Transforms/LoopVersioning.cpp
flang/test/Transforms/loop-versioning.fir
Modified:
flang/include/flang/Optimizer/Transforms/Passes.h
flang/include/flang/Optimizer/Transforms/Passes.td
flang/include/flang/Tools/CLOptions.inc
flang/lib/Optimizer/Transforms/CMakeLists.txt
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 0d3bd325f001..863e0df1e737 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -62,6 +62,7 @@ std::unique_ptr<mlir::Pass> createMemoryAllocationPass();
std::unique_ptr<mlir::Pass> createStackArraysPass();
std::unique_ptr<mlir::Pass> createSimplifyIntrinsicsPass();
std::unique_ptr<mlir::Pass> createAddDebugFoundationPass();
+std::unique_ptr<mlir::Pass> createLoopVersioningPass();
std::unique_ptr<mlir::Pass>
createMemoryAllocationPass(bool dynOnHeap, std::size_t maxStackSize);
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 4ac85c4f829d..1da69b5d57a8 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -294,4 +294,16 @@ def OpenACCDataOperandConversion : Pass<"fir-openacc-data-operand-conversion", "
];
}
+def LoopVersioning : Pass<"loop-versioning", "mlir::func::FuncOp"> {
+ let summary = "Loop Versioning";
+ let description = [{
+ Loop Versioning pass adds a check and two variants of a loop when the input
+ array is an assumed shape array, to optimize for the (often common) case where
+ an array has element sized stride. The element sizes stride allows some
+ loops to be vectorized as well as other loop optimizations.
+ }];
+ let constructor = "::fir::createLoopVersioningPass()";
+ let dependentDialects = [ "fir::FIROpsDialect" ];
+}
+
#endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index cb19d5ddfd9b..037741e18d40 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -200,6 +200,10 @@ inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
pm.addPass(fir::createSimplifyIntrinsicsPass());
pm.addPass(fir::createAlgebraicSimplificationPass(config));
}
+
+ if (loopVersioning)
+ pm.addPass(fir::createLoopVersioningPass());
+
pm.addPass(mlir::createCSEPass());
if (stackArrays)
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index 44a7019a2202..3f63035a9298 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -16,6 +16,7 @@ add_flang_library(FIRTransforms
AddDebugFoundation.cpp
PolymorphicOpConversion.cpp
OpenACC/OpenACCDataOperandConversion.cpp
+ LoopVersioning.cpp
DEPENDS
FIRDialect
diff --git a/flang/lib/Optimizer/Transforms/LoopVersioning.cpp b/flang/lib/Optimizer/Transforms/LoopVersioning.cpp
new file mode 100644
index 000000000000..4695d39ee41c
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/LoopVersioning.cpp
@@ -0,0 +1,313 @@
+//===- LoopVersioning.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+/// \file
+/// This pass looks for loops iterating over assumed-shape arrays, that can
+/// be optimized by "guessing" that the stride is element-sized.
+///
+/// This is done by createing two versions of the same loop: one which assumes
+/// that the elements are contiguous (stride == size of element), and one that
+/// is the original generic loop.
+///
+/// As a side-effect of the assumed element size stride, the array is also
+/// flattened to make it a 1D array - this is because the internal array
+/// structure must be either 1D or have known sizes in all dimensions - and at
+/// least one of the dimensions here is already unknown.
+///
+/// There are two distinct benefits here:
+/// 1. The loop that iterates over the elements is somewhat simplified by the
+/// constant stride calculation.
+/// 2. Since the compiler can understand the size of the stride, it can use
+/// vector instructions, where an unknown (at compile time) stride does often
+/// prevent vector operations from being used.
+///
+/// A known drawback is that the code-size is increased, in some cases that can
+/// be quite substantial - 3-4x is quite plausible (this includes that the loop
+/// gets vectorized, which in itself often more than doubles the size of the
+/// code, because unless the loop size is known, there will be a modulo
+/// vector-size remainder to deal with.
+///
+/// TODO: Do we need some size limit where loops no longer get duplicated?
+// Maybe some sort of cost analysis.
+/// TODO: Should some loop content - for example calls to functions and
+/// subroutines inhibit the versioning of the loops. Plausibly, this
+/// could be part of the cost analysis above.
+//===----------------------------------------------------------------------===//
+
+#include "flang/ISO_Fortran_binding.h"
+#include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/Runtime/Inquiry.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Dialect/Support/FIRContext.h"
+#include "flang/Optimizer/Dialect/Support/KindMapping.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/Matchers.h"
+#include "mlir/IR/TypeUtilities.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "mlir/Transforms/RegionUtils.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+
+namespace fir {
+#define GEN_PASS_DEF_LOOPVERSIONING
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+#define DEBUG_TYPE "flang-loop-versioning"
+
+namespace {
+
+class LoopVersioningPass
+ : public fir::impl::LoopVersioningBase<LoopVersioningPass> {
+
+public:
+ void runOnOperation() override;
+};
+
+} // namespace
+
+/// @c replaceOuterUses - replace uses outside of @c op with result of @c
+/// outerOp
+static void replaceOuterUses(mlir::Operation *op, mlir::Operation *outerOp) {
+ const mlir::Operation *outerParent = outerOp->getParentOp();
+ op->replaceUsesWithIf(outerOp, [&](mlir::OpOperand &operand) {
+ mlir::Operation *owner = operand.getOwner();
+ return outerParent == owner->getParentOp();
+ });
+}
+
+static fir::SequenceType getAsSequenceType(mlir::Value *v) {
+ mlir::Type argTy = fir::unwrapPassByRefType(fir::unwrapRefType(v->getType()));
+ return argTy.dyn_cast<fir::SequenceType>();
+}
+
+void LoopVersioningPass::runOnOperation() {
+ LLVM_DEBUG(llvm::dbgs() << "=== Begin " DEBUG_TYPE " ===\n");
+ mlir::func::FuncOp func = getOperation();
+
+ /// @c ArgInfo
+ /// A structure to hold an argument, the size of the argument and dimension
+ /// information.
+ struct ArgInfo {
+ mlir::Value *arg;
+ size_t size;
+ fir::BoxDimsOp dims[CFI_MAX_RANK];
+ };
+
+ // First look for arguments with assumed shape = unknown extent in the lowest
+ // dimension.
+ LLVM_DEBUG(llvm::dbgs() << "Func-name:" << func.getSymName() << "\n");
+ mlir::Block::BlockArgListType args = func.getArguments();
+ mlir::ModuleOp module = func->getParentOfType<mlir::ModuleOp>();
+ fir::KindMapping kindMap = fir::getKindMapping(module);
+ mlir::SmallVector<ArgInfo> argsOfInterest;
+ for (auto &arg : args) {
+ if (auto seqTy = getAsSequenceType(&arg)) {
+ unsigned rank = seqTy.getDimension();
+ // Currently limited to 1D or 2D arrays as that seems to give good
+ // improvement without excessive increase in code-size, etc.
+ if (rank > 0 && rank < 3 &&
+ seqTy.getShape()[0] == fir::SequenceType::getUnknownExtent()) {
+ size_t typeSize = 0;
+ mlir::Type elementType = fir::unwrapSeqOrBoxedSeqType(arg.getType());
+ if (elementType.isa<mlir::FloatType>() ||
+ elementType.isa<mlir::IntegerType>())
+ typeSize = elementType.getIntOrFloatBitWidth() / 8;
+ else if (auto cty = elementType.dyn_cast<fir::ComplexType>())
+ typeSize = 2 * cty.getEleType(kindMap).getIntOrFloatBitWidth() / 8;
+ if (typeSize)
+ argsOfInterest.push_back({&arg, typeSize, {}});
+ else
+ LLVM_DEBUG(llvm::dbgs() << "Type not supported\n");
+
+ } else {
+ LLVM_DEBUG(llvm::dbgs() << "Too many dimensions\n");
+ }
+ }
+ }
+
+ if (argsOfInterest.empty())
+ return;
+
+ struct OpsWithArgs {
+ mlir::Operation *op;
+ mlir::SmallVector<ArgInfo> argsAndDims;
+ };
+ // Now see if those arguments are used inside any loop.
+ mlir::SmallVector<OpsWithArgs, 4> loopsOfInterest;
+
+ func.walk([&](fir::DoLoopOp loop) {
+ mlir::Block &body = *loop.getBody();
+ mlir::SmallVector<ArgInfo> argsInLoop;
+ body.walk([&](fir::CoordinateOp op) {
+ // The current operation could be inside another loop than
+ // the one we're currently processing. Skip it, we'll get
+ // to it later.
+ if (op->getParentOfType<fir::DoLoopOp>() != loop)
+ return;
+ const mlir::Value &operand = op->getOperand(0);
+ for (auto a : argsOfInterest) {
+ if (*a.arg == operand) {
+ // Only add if it's not already in the list.
+ if (std::find_if(argsInLoop.begin(), argsInLoop.end(), [&](auto it) {
+ return it.arg == a.arg;
+ }) == argsInLoop.end()) {
+
+ argsInLoop.push_back(a);
+ break;
+ }
+ }
+ }
+ });
+
+ if (!argsInLoop.empty()) {
+ OpsWithArgs ops = {loop, argsInLoop};
+ loopsOfInterest.push_back(ops);
+ }
+ });
+ if (loopsOfInterest.empty())
+ return;
+
+ // If we get here, there are loops to process.
+ fir::FirOpBuilder builder{module, kindMap};
+ mlir::Location loc = builder.getUnknownLoc();
+ mlir::IndexType idxTy = builder.getIndexType();
+
+ LLVM_DEBUG(llvm::dbgs() << "Module Before transformation:");
+ LLVM_DEBUG(module->dump());
+
+ LLVM_DEBUG(llvm::dbgs() << "loopsOfInterest: " << loopsOfInterest.size()
+ << "\n");
+ for (auto op : loopsOfInterest) {
+ LLVM_DEBUG(op.op->dump());
+ builder.setInsertionPoint(op.op);
+
+ mlir::Value allCompares = nullptr;
+ // Ensure all of the arrays are unit-stride.
+ for (auto &arg : op.argsAndDims) {
+
+ fir::SequenceType seqTy = getAsSequenceType(arg.arg);
+ unsigned rank = seqTy.getDimension();
+
+ // We only care about lowest order dimension.
+ for (unsigned i = 0; i < rank; i++) {
+ mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i);
+ arg.dims[i] = builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy,
+ *arg.arg, dimIdx);
+ }
+ mlir::Value elemSize =
+ builder.createIntegerConstant(loc, idxTy, arg.size);
+ mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(
+ loc, mlir::arith::CmpIPredicate::eq, arg.dims[0].getResult(2),
+ elemSize);
+ if (!allCompares) {
+ allCompares = cmp;
+ } else {
+ allCompares =
+ builder.create<mlir::arith::AndIOp>(loc, cmp, allCompares);
+ }
+ }
+
+ auto ifOp =
+ builder.create<fir::IfOp>(loc, op.op->getResultTypes(), allCompares,
+ /*withElse=*/true);
+ builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+
+ LLVM_DEBUG(llvm::dbgs() << "Creating cloned loop\n");
+ mlir::Operation *clonedLoop = op.op->clone();
+ bool changed = false;
+ for (auto &arg : op.argsAndDims) {
+ fir::SequenceType::Shape newShape;
+ newShape.push_back(fir::SequenceType::getUnknownExtent());
+ auto elementType = fir::unwrapSeqOrBoxedSeqType(arg.arg->getType());
+ mlir::Type arrTy = fir::SequenceType::get(newShape, elementType);
+ mlir::Type boxArrTy = fir::BoxType::get(arrTy);
+ mlir::Type refArrTy = builder.getRefType(arrTy);
+ auto carg = builder.create<fir::ConvertOp>(loc, boxArrTy, *arg.arg);
+ auto caddr = builder.create<fir::BoxAddrOp>(loc, refArrTy, carg);
+ auto insPt = builder.saveInsertionPoint();
+ // Use caddr instead of arg.
+ clonedLoop->walk([&](fir::CoordinateOp coop) {
+ // Reduce the multi-dimensioned index to a single index.
+ // This is required becase fir arrays do not support multiple dimensions
+ // with unknown dimensions at compile time.
+ if (coop->getOperand(0) == *arg.arg &&
+ coop->getOperands().size() >= 2) {
+ builder.setInsertionPoint(coop);
+ mlir::Value totalIndex = builder.createIntegerConstant(loc, idxTy, 0);
+ // Operand(1) = array; Operand(2) = index1; Operand(3) = index2
+ for (unsigned i = coop->getOperands().size() - 1; i > 1; i--) {
+ mlir::Value curIndex =
+ builder.createConvert(loc, idxTy, coop->getOperand(i));
+ // First arg is Operand2, so dims[i-2] is 0-based i-1!
+ mlir::Value scale =
+ builder.createConvert(loc, idxTy, arg.dims[i - 2].getResult(1));
+ totalIndex = builder.create<mlir::arith::AddIOp>(
+ loc, totalIndex,
+ builder.create<mlir::arith::MulIOp>(loc, scale, curIndex));
+ }
+ totalIndex = builder.create<mlir::arith::AddIOp>(
+ loc, totalIndex,
+ builder.createConvert(loc, idxTy, coop->getOperand(1)));
+
+ auto newOp = builder.create<fir::CoordinateOp>(
+ loc, builder.getRefType(elementType), caddr,
+ mlir::ValueRange{totalIndex});
+ LLVM_DEBUG(newOp->dump());
+ coop->getResult(0).replaceAllUsesWith(newOp->getResult(0));
+ coop->erase();
+ changed = true;
+ }
+ });
+
+ builder.restoreInsertionPoint(insPt);
+ }
+ assert(changed && "Expected operations to have changed");
+
+ builder.insert(clonedLoop);
+ // Forward the result(s), if any, from the loop operation to the
+ //
+ mlir::ResultRange results = clonedLoop->getResults();
+ bool hasResults = (results.size() > 0);
+ if (hasResults)
+ builder.create<fir::ResultOp>(loc, results);
+
+ // Add the original loop in the else-side of the if operation.
+ builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+ replaceOuterUses(op.op, ifOp);
+ op.op->remove();
+ builder.insert(op.op);
+ // Rely on "cloned loop has results, so original loop also has results".
+ if (hasResults) {
+ builder.create<fir::ResultOp>(loc, op.op->getResults());
+ } else {
+ // Use an assert to check this.
+ assert(op.op->getResults().size() == 0 &&
+ "Weird, the cloned loop doesn't have results, but the original "
+ "does?");
+ }
+ }
+
+ LLVM_DEBUG(llvm::dbgs() << "After transform:\n");
+ LLVM_DEBUG(module->dump());
+
+ LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n");
+}
+
+std::unique_ptr<mlir::Pass> fir::createLoopVersioningPass() {
+ return std::make_unique<LoopVersioningPass>();
+}
diff --git a/flang/test/Transforms/loop-versioning.fir b/flang/test/Transforms/loop-versioning.fir
new file mode 100644
index 000000000000..f26dbd9bb80e
--- /dev/null
+++ b/flang/test/Transforms/loop-versioning.fir
@@ -0,0 +1,115 @@
+// RUN: fir-opt --loop-versioning %s | FileCheck %s
+
+
+// subroutine sum1d(a, n)
+// real*8 :: a(:)
+// integer :: n
+// real*8 :: sum
+// integer :: i
+// sum = 0
+// do i=1,n
+// sum = sum + a(i)
+// end do
+// end subroutine sum1d
+module {
+ func.func @sum1d(%arg0: !fir.box<!fir.array<?xf64>> {fir.bindc_name = "a"}, %arg1: !fir.ref<i32> {fir.bindc_name = "n"}) {
+ %0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMmoduleFsum1dEi"}
+ %1 = fir.alloca f64 {bindc_name = "sum", uniq_name = "_QMmoduleFsum1dEsum"}
+ %cst = arith.constant 0.000000e+00 : f64
+ fir.store %cst to %1 : !fir.ref<f64>
+ %c1_i32 = arith.constant 1 : i32
+ %2 = fir.convert %c1_i32 : (i32) -> index
+ %3 = fir.load %arg1 : !fir.ref<i32>
+ %4 = fir.convert %3 : (i32) -> index
+ %c1 = arith.constant 1 : index
+ %5 = fir.convert %2 : (index) -> i32
+ %6:2 = fir.do_loop %arg2 = %2 to %4 step %c1 iter_args(%arg3 = %5) -> (index, i32) {
+ fir.store %arg3 to %0 : !fir.ref<i32>
+ %7 = fir.load %1 : !fir.ref<f64>
+ %8 = fir.load %0 : !fir.ref<i32>
+ %9 = fir.convert %8 : (i32) -> i64
+ %c1_i64 = arith.constant 1 : i64
+ %10 = arith.subi %9, %c1_i64 : i64
+ %11 = fir.coordinate_of %arg0, %10 : (!fir.box<!fir.array<?xf64>>, i64) -> !fir.ref<f64>
+ %12 = fir.load %11 : !fir.ref<f64>
+ %13 = arith.addf %7, %12 fastmath<contract> : f64
+ fir.store %13 to %1 : !fir.ref<f64>
+ %14 = arith.addi %arg2, %c1 : index
+ %15 = fir.convert %c1 : (index) -> i32
+ %16 = fir.load %0 : !fir.ref<i32>
+ %17 = arith.addi %16, %15 : i32
+ fir.result %14, %17 : index, i32
+ }
+ fir.store %6#1 to %0 : !fir.ref<i32>
+ return
+ }
+
+// Note this only checks the expected transformation, not the entire generated code:
+// CHECK-LABEL: func.func @sum1d(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?xf64>> {{.*}})
+// CHECK: %[[ZERO:.*]] = arith.constant 0 : index
+// CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[ARG0]], %[[ZERO]] : {{.*}}
+// CHECK: %[[SIZE:.*]] = arith.constant 8 : index
+// CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[DIMS]]#2, %[[SIZE]]
+// CHECK: %[[IF_RES:.*]]:2 = fir.if %[[CMP]] -> {{.*}}
+// CHECK: %[[NEWARR:.*]] = fir.convert %[[ARG0]]
+// CHECK: %[[BOXADDR:.*]] = fir.box_addr %[[NEWARR]] : {{.*}} -> !fir.ref<!fir.array<?xf64>>
+// CHECK: %[[LOOP_RES:.*]]:2 = fir.do_loop {{.*}}
+// CHECK: %[[COORD:.*]] = fir.coordinate_of %[[BOXADDR]], %{{.*}} : (!fir.ref<!fir.array<?xf64>>, index) -> !fir.ref<f64>
+// CHECK: %{{.*}} = fir.load %[[COORD]] : !fir.ref<f64>
+// CHECK: fir.result %{{.*}}, %{{.*}}
+// CHECK: }
+// CHECK fir.result %[[LOOP_RES]]#0, %[[LOOP_RES]]#1
+// CHECK: } else {
+// CHECK: %[[LOOP_RES2:.*]]:2 = fir.do_loop {{.*}}
+// CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG0]], %{{.*}} : (!fir.box<!fir.array<?xf64>>, i64) -> !fir.ref<f64>
+// CHECK: %{{.*}}= fir.load %[[COORD2]] : !fir.ref<f64>
+// CHECK: fir.result %{{.*}}, %{{.*}}
+// CHECK: }
+// CHECK fir.result %[[LOOP_RES2]]#0, %[[LOOP_RES2]]#1
+// CHECK: }
+// CHECK: fir.store %[[IF_RES]]#1 to %{{.*}}
+// CHECK: return
+
+// -----
+
+// Test that loop-versioning pass doesn't expand known size arrays.
+func.func @sum1dfixed(%arg0: !fir.ref<!fir.array<?xf64>> {fir.bindc_name = "a"}, %arg1: !fir.ref<i32> {fir.bindc_name = "n"}) {
+ %0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsum1dfixedEi"}
+ %1 = fir.alloca f64 {bindc_name = "sum", uniq_name = "_QFsum1dfixedEsum"}
+ %cst = arith.constant 0.000000e+00 : f64
+ fir.store %cst to %1 : !fir.ref<f64>
+ %c1_i32 = arith.constant 1 : i32
+ %2 = fir.convert %c1_i32 : (i32) -> index
+ %3 = fir.load %arg1 : !fir.ref<i32>
+ %4 = fir.convert %3 : (i32) -> index
+ %c1 = arith.constant 1 : index
+ %5 = fir.convert %2 : (index) -> i32
+ %6:2 = fir.do_loop %arg2 = %2 to %4 step %c1 iter_args(%arg3 = %5) -> (index, i32) {
+ fir.store %arg3 to %0 : !fir.ref<i32>
+ %7 = fir.load %1 : !fir.ref<f64>
+ %8 = fir.load %0 : !fir.ref<i32>
+ %9 = fir.convert %8 : (i32) -> i64
+ %c1_i64 = arith.constant 1 : i64
+ %10 = arith.subi %9, %c1_i64 : i64
+ %11 = fir.coordinate_of %arg0, %10 : (!fir.ref<!fir.array<?xf64>>, i64) -> !fir.ref<f64>
+ %12 = fir.load %11 : !fir.ref<f64>
+ %13 = arith.addf %7, %12 fastmath<contract> : f64
+ fir.store %13 to %1 : !fir.ref<f64>
+ %14 = arith.addi %arg2, %c1 : index
+ %15 = fir.convert %c1 : (index) -> i32
+ %16 = fir.load %0 : !fir.ref<i32>
+ %17 = arith.addi %16, %15 : i32
+ fir.result %14, %17 : index, i32
+ }
+ fir.store %6#1 to %0 : !fir.ref<i32>
+ return
+ }
+
+// CHECK-LABEL: func.func @sum1dfixed(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<?xf64>> {{.*}})
+// CHECK: fir.do_loop {{.*}}
+// CHECK: %[[COORD:.*]] = fir.coordinate_of %[[ARG0]], {{.*}}
+// CHECK: %{{.*}} = fir.load %[[COORD]]
+
+} // End module
More information about the flang-commits
mailing list