[flang-commits] [flang] [Flang] Add opt-in affine loop optimization pipeline (PR #191854)

Tue Apr 14 04:33:59 PDT 2026

================
@@ -0,0 +1,638 @@
+//===-- SimplifyDoLoop.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// General-purpose FIR loop canonicalization pass.
+//
+// Transforms fir.do_loop nests into a canonical form suitable for affine
+// promotion and loop optimizations (tiling, fusion, interchange, etc.).
+//
+// The canonical form has:
+//   - No iter_args (shadow induction variable copies removed)
+//   - No memory-based IV tracking inside the loop body
+//   - Final IV values computed and stored after the outermost loop
+//
+// === Design Overview ===
+//
+// Analysis phase (per loop nest):
+//   1. Collect perfectly nested fir.do_loop chain.
+//   2. For each loop, verify iter_arg is a shadow of the induction variable:
+//      - init = fir.convert(lower_bound)
+//      - yield = arith.addi(iter_arg_or_load_of_iv, fir.convert(step))
+//   3. Verify safety conditions:
+//      a. Only one store to IV alloca inside loop (the init store of iter_arg)
+//      b. No function/subroutine calls in the nest
+//      c. IV alloca does not escape (only load/store/declare users)
+//      d. Loop results are only used for final IV stores
+//
+// Transformation phase:
+//   1. For each loop (innermost first):
+//      a. Remove the initial store (fir.store %iter_arg to %iv_alloca)
+//      b. Forward all loads of IV alloca inside loop body to fir.convert(IV)
+//      todo: the forwarding of load of iv alloca can be done by some other pass like fir-memref-dataflow-opt pass (if it is available).
+//      c. Strip iter_args and fir.result, rebuild as simple fir.do_loop
+//   2. After the outermost loop, compute and store final IV values
+//      for all loops whose IV is live after the loop (outer to inner order).
+//      Fortran final value: final_iv = lb + ((ub - lb + step) / step) * step
+//      which equals the value of the iter_arg after the last increment.
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/Builders.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+
+namespace fir {
+#define GEN_PASS_DEF_SIMPLIFYDOLOOP
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+#define DEBUG_TYPE "simplify-do-loop"
+
+using namespace fir;
+using namespace mlir;
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// Per-loop bookkeeping built during analysis
+//===----------------------------------------------------------------------===//
+
+struct LoopIVInfo {
+  fir::DoLoopOp loop;
+  Value ivAlloca;                  // fir.alloca for this loop's IV
+  SmallVector<Value, 2> ivAliases; // ivAlloca + any fir.declare alias
+  Value lowerBound;                // index-typed lower bound
+  Value upperBound;                // index-typed upper bound
+  Value step;                      // index-typed step
+  Type ivType;                     // Fortran IV type (e.g. i32)
+};
+
+//===----------------------------------------------------------------------===//
+// Helpers
+//===----------------------------------------------------------------------===//
+
+/// Collect the IV memory reference and all its aliases (the raw fir.alloca
+/// and any fir.declare results that alias it).  `ivRef` may be either the
+/// alloca itself or a declare result — we normalise to the underlying alloca
+/// first, then collect all declare aliases from it.
+static SmallVector<Value, 2> collectAliases(Value ivRef) {
+  SmallVector<Value, 2> aliases;
+
+  // If ivRef is a declare result, trace back to the underlying alloca.
+  Value underlying = ivRef;
+  if (auto decl = ivRef.getDefiningOp<fir::DeclareOp>())
+    underlying = decl.getMemref();
+
+  aliases.push_back(underlying);
+  for (auto *user : underlying.getUsers())
+    if (auto decl = dyn_cast<fir::DeclareOp>(user))
+      aliases.push_back(decl.getResult());
+
+  return aliases;
+}
+
+/// Collect a perfectly nested chain of fir.do_loop ops starting from `outer`.
----------------
tblah wrote:

This function does not guarantee perfect nesting. It will return any nest of do loops, so long as there aren't more than one loop in the body of each.

https://github.com/llvm/llvm-project/pull/191854