[flang-commits] [flang] 3098b4d - [flang] Added LICM hoisting for nested regions. (#190696)
via flang-commits
flang-commits at lists.llvm.org
Tue Apr 7 11:41:05 PDT 2026
Author: Slava Zakharin
Date: 2026-04-07T11:41:00-07:00
New Revision: 3098b4da81c254b40f6bc931bbb1933df12dc0a9
URL: https://github.com/llvm/llvm-project/commit/3098b4da81c254b40f6bc931bbb1933df12dc0a9
DIFF: https://github.com/llvm/llvm-project/commit/3098b4da81c254b40f6bc931bbb1933df12dc0a9.diff
LOG: [flang] Added LICM hoisting for nested regions. (#190696)
This patch adds a couple of experimental LICM modes
that allow hoisting operations from regions nested
inside a loop, e.g. when there is `fir.if` inside
`fir.do_loop`. The aggressive mode hoists all operations
that are safe to hoist. The cheap mode hoists only
"cheap" operations (currently, only `fir.convert`),
though the definition of "cheap" needs to be worked out.
Added:
Modified:
flang/include/flang/Optimizer/Transforms/Passes.h
flang/include/flang/Optimizer/Transforms/Passes.td
flang/lib/Optimizer/Transforms/LoopInvariantCodeMotion.cpp
flang/test/Transforms/licm.fir
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 50e8e6c58bf62..adacd3cc0cf51 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -28,6 +28,14 @@ class ModuleOp;
namespace fir {
+/// Controls hoisting of invariant ops from nested regions (e.g. scf.if
+/// within loops) in the flang-licm pass.
+enum class LICMNestedHoistingMode {
+ None, ///< Do not hoist from nested regions.
+ Cheap, ///< Only hoist cheap ops like fir.convert.
+ Aggressive, ///< Hoist all safe invariant ops.
+};
+
//===----------------------------------------------------------------------===//
// Passes defined in Passes.td
//===----------------------------------------------------------------------===//
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 5c7920ce3fa62..71c9f7b62d2be 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -662,6 +662,19 @@ def LoopInvariantCodeMotion : Pass<"flang-licm", "::mlir::func::FuncOp"> {
The pass only moves existing operations, so there are no dependent
dialects.
}];
+ let options = [Option<"hoistFromNestedRegions", "hoist-from-nested-regions",
+ "::fir::LICMNestedHoistingMode",
+ /*default=*/"::fir::LICMNestedHoistingMode::Cheap",
+ "Control hoisting of invariant ops from nested regions "
+ "(e.g. scf.if within loops)",
+ [{::llvm::cl::values(
+ clEnumValN(::fir::LICMNestedHoistingMode::None,
+ "none", "Do not hoist from nested regions"),
+ clEnumValN(::fir::LICMNestedHoistingMode::Cheap,
+ "cheap", "Only hoist cheap ops like fir.convert"),
+ clEnumValN(::fir::LICMNestedHoistingMode::Aggressive,
+ "aggressive", "Hoist all safe invariant ops")
+ )}]>];
}
#endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES
diff --git a/flang/lib/Optimizer/Transforms/LoopInvariantCodeMotion.cpp b/flang/lib/Optimizer/Transforms/LoopInvariantCodeMotion.cpp
index 8ebb8982936e8..d1c4046f38b19 100644
--- a/flang/lib/Optimizer/Transforms/LoopInvariantCodeMotion.cpp
+++ b/flang/lib/Optimizer/Transforms/LoopInvariantCodeMotion.cpp
@@ -18,6 +18,7 @@
#include "flang/Optimizer/Dialect/FortranVariableInterface.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/Transforms/Passes.h"
+#include "mlir/Interfaces/LoopLikeInterface.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/LoopInvariantCodeMotionUtils.h"
#include "llvm/ADT/TypeSwitch.h"
@@ -48,6 +49,7 @@ using namespace mlir;
/// (see isSafeToHoistLoad() comments below).
struct LoopInvariantCodeMotion
: fir::impl::LoopInvariantCodeMotionBase<LoopInvariantCodeMotion> {
+ using LoopInvariantCodeMotionBase::LoopInvariantCodeMotionBase;
void runOnOperation() override;
};
@@ -162,26 +164,37 @@ static bool isNonOptionalScalar(Value location) {
/// Returns true iff it is safe to hoist the given load-like operation 'op',
/// which access given memory 'locations', out of the operation 'loopLike'.
/// The current safety conditions are:
-/// * The loop runs at least one iteration, OR
+/// * The load is known to be unconditionally executed in the loop and the
+/// loop runs at least one iteration, OR
/// * all the accessed locations are inside scalar non-OPTIONAL
/// Fortran objects (Fortran descriptors are considered to be scalars).
+///
+/// When \p maybeConditionallyExecuted is true, the load may be inside a
+/// conditional region (e.g. scf.if) within the loop, so the trip count
+/// shortcut cannot be used: even if the loop runs, the condition might never
+/// be true and the load might access an invalid location.
+/// TODO: analyze the parent operation to determine whether it truly
+/// conditionally executes its body (e.g. scf.execute_region always does).
static bool isSafeToHoistLoad(Operation *op, ArrayRef<Value> locations,
LoopLikeOpInterface loopLike,
- AliasAnalysis &aliasAnalysis) {
+ AliasAnalysis &aliasAnalysis,
+ bool maybeConditionallyExecuted) {
for (Value location : locations)
- if (aliasAnalysis.getModRef(loopLike.getOperation(), location)
- .isModAndRef()) {
+ if (aliasAnalysis.getModRef(loopLike.getOperation(), location).isMod()) {
LDBG() << "Failure: reads location:\n"
<< location << "\nwhich is modified inside the loop";
return false;
}
// Check that it is safe to read from all the locations before the loop.
- std::optional<llvm::APInt> tripCount = loopLike.getStaticTripCount();
- if (tripCount && !tripCount->isZero()) {
- // Loop executes at least one iteration, so it is safe to hoist.
- LDBG() << "Success: loop has non-zero iterations";
- return true;
+ if (!maybeConditionallyExecuted) {
+ std::optional<llvm::APInt> tripCount = loopLike.getStaticTripCount();
+ if (tripCount && !tripCount->isZero()) {
+ // Loop executes at least one iteration and the load is unconditionally
+ // executed in the loop body, so it is safe to hoist.
+ LDBG() << "Success: loop has non-zero iterations";
+ return true;
+ }
}
// Check whether the access must always be valid.
@@ -193,8 +206,10 @@ static bool isSafeToHoistLoad(Operation *op, ArrayRef<Value> locations,
/// Returns true iff the given 'op' is a load-like operation,
/// and it can be hoisted out of 'loopLike' operation.
+/// See isSafeToHoistLoad for the meaning of \p maybeConditionallyExecuted.
static bool canHoistLoad(Operation *op, LoopLikeOpInterface loopLike,
- AliasAnalysis &aliasAnalysis) {
+ AliasAnalysis &aliasAnalysis,
+ bool maybeConditionallyExecuted) {
LDBG() << "Checking operation:\n" << *op;
if (auto effectInterface = dyn_cast<MemoryEffectOpInterface>(op)) {
SmallVector<MemoryEffects::EffectInstance> effects;
@@ -216,12 +231,29 @@ static bool canHoistLoad(Operation *op, LoopLikeOpInterface loopLike,
locations.insert(location);
}
return isSafeToHoistLoad(op, locations.getArrayRef(), loopLike,
- aliasAnalysis);
+ aliasAnalysis, maybeConditionallyExecuted);
}
LDBG() << "Failure: has unknown effects";
return false;
}
+/// Recursively collect regions from operations inside \p region, skipping
+/// IsolatedFromAbove operations (whose regions form a separate scope) and
+/// LoopLikeOpInterface operations (which have their own LICM invocation).
+static void collectNestedRegions(Region ®ion,
+ SmallVectorImpl<Region *> &result) {
+ for (Operation &op : region.getOps()) {
+ if (op.hasTrait<OpTrait::IsIsolatedFromAbove>())
+ continue;
+ if (isa<LoopLikeOpInterface>(&op))
+ continue;
+ for (Region &nested : op.getRegions()) {
+ result.push_back(&nested);
+ collectNestedRegions(nested, result);
+ }
+ }
+}
+
void LoopInvariantCodeMotion::runOnOperation() {
if (disableFlangLICM) {
LDBG() << "Skipping [HL]FIR LoopInvariantCodeMotion()";
@@ -233,8 +265,9 @@ void LoopInvariantCodeMotion::runOnOperation() {
auto &aliasAnalysis = getAnalysis<AliasAnalysis>();
aliasAnalysis.addAnalysisImplementation(fir::AliasAnalysis{});
- std::function<bool(Operation *, LoopLikeOpInterface loopLike)>
- shouldMoveOutOfLoop = [&](Operation *op, LoopLikeOpInterface loopLike) {
+ std::function<bool(Operation *, LoopLikeOpInterface, bool)>
+ shouldMoveOutOfLoop = [&](Operation *op, LoopLikeOpInterface loopLike,
+ bool maybeConditionallyExecuted) {
if (isPure(op)) {
LDBG() << "Pure operation: " << *op;
return true;
@@ -255,7 +288,8 @@ void LoopInvariantCodeMotion::runOnOperation() {
nestedOps.push_back(&nestedOp);
bool result = llvm::all_of(nestedOps, [&](Operation *nestedOp) {
- return shouldMoveOutOfLoop(nestedOp, loopLike);
+ return shouldMoveOutOfLoop(nestedOp, loopLike,
+ maybeConditionallyExecuted);
});
LDBG() << "Recursive operation can" << (result ? "" : "not")
<< " be hoisted";
@@ -268,7 +302,8 @@ void LoopInvariantCodeMotion::runOnOperation() {
return result;
}
}
- return canHoistLoad(op, loopLike, aliasAnalysis);
+ return canHoistLoad(op, loopLike, aliasAnalysis,
+ maybeConditionallyExecuted);
};
getOperation()->walk([&](LoopLikeOpInterface loopLike) {
@@ -297,26 +332,77 @@ void LoopInvariantCodeMotion::runOnOperation() {
});
return;
}
+ auto isDefinedOutsideRegion = [&](Value value, Region *) {
+ return loopLike.isDefinedOutsideOfLoop(value);
+ };
+ auto canMoveOutOfLoop = [&](Operation *op) {
+ if (!fir::canMoveOutOf(loopLike, op)) {
+ LDBG() << "Cannot hoist " << *op << " out of the loop";
+ return false;
+ }
+ if (!fir::canMoveFromDescendant(parentOp, loopLike, op)) {
+ LDBG() << "Cannot hoist " << *op << " into the parent of the loop";
+ return false;
+ }
+ return true;
+ };
+ auto moveOutOfRegion = [&](Operation *op, Region *) {
+ loopLike.moveOutOfLoop(op);
+ };
+
moveLoopInvariantCode(
- loopLike.getLoopRegions(),
- /*isDefinedOutsideRegion=*/
- [&](Value value, Region *) {
- return loopLike.isDefinedOutsideOfLoop(value);
- },
+ loopLike.getLoopRegions(), isDefinedOutsideRegion,
/*shouldMoveOutOfRegion=*/
[&](Operation *op, Region *) {
- if (!fir::canMoveOutOf(loopLike, op)) {
- LDBG() << "Cannot hoist " << *op << " out of the loop";
- return false;
- }
- if (!fir::canMoveFromDescendant(parentOp, loopLike, op)) {
- LDBG() << "Cannot hoist " << *op << " into the parent of the loop";
- return false;
- }
- return shouldMoveOutOfLoop(op, loopLike);
+ return canMoveOutOfLoop(op) &&
+ shouldMoveOutOfLoop(op, loopLike,
+ /*maybeConditionallyExecuted=*/false);
},
- /*moveOutOfRegion=*/
- [&](Operation *op, Region *) { loopLike.moveOutOfLoop(op); });
+ moveOutOfRegion);
+
+ if (hoistFromNestedRegions == fir::LICMNestedHoistingMode::None)
+ return;
+
+ // Hoist loop-invariant ops from nested regions (e.g., fir.convert
+ // inside scf.if) out of the loop. This enables CSE to deduplicate
+ // converted memrefs, which improves alias analysis for parallelization.
+ // The callbacks close over loopLike (ignoring the Region* parameter),
+ // so invariance and movement are evaluated against the loop, not the
+ // nested region.
+ // Loads hoisted from nested regions are treated as maybe-conditionally
+ // executed: we do not know whether the parent operation always executes
+ // its body (e.g. scf.execute_region does, scf.if might not), so the
+ // trip count shortcut cannot prove safety.
+ // TODO: analyze the parent operation to determine whether it truly
+ // conditionally executes its body.
+ SmallVector<Region *> nestedRegions;
+ for (Region *loopRegion : loopLike.getLoopRegions())
+ collectNestedRegions(*loopRegion, nestedRegions);
+
+ if (nestedRegions.empty())
+ return;
+
+ auto shouldMoveFromNestedRegion = [&](Operation *op, Region *) {
+ return canMoveOutOfLoop(op) &&
+ shouldMoveOutOfLoop(op, loopLike,
+ /*maybeConditionallyExecuted=*/true);
+ };
+ if (hoistFromNestedRegions == fir::LICMNestedHoistingMode::Aggressive) {
+ moveLoopInvariantCode(nestedRegions, isDefinedOutsideRegion,
+ shouldMoveFromNestedRegion, moveOutOfRegion);
+ } else {
+ // "cheap" mode: only hoist fir.convert.
+ // TODO: refine the cost model for "cheap" hoisting to include
+ // other inexpensive operations.
+ moveLoopInvariantCode(
+ nestedRegions, isDefinedOutsideRegion,
+ /*shouldMoveOutOfRegion=*/
+ [&](Operation *op, Region *region) {
+ return isa<fir::ConvertOp>(op) &&
+ shouldMoveFromNestedRegion(op, region);
+ },
+ moveOutOfRegion);
+ }
});
LDBG() << "Exit [HL]FIR LoopInvariantCodeMotion()";
diff --git a/flang/test/Transforms/licm.fir b/flang/test/Transforms/licm.fir
index 049d86b6171fe..3490c64a67222 100644
--- a/flang/test/Transforms/licm.fir
+++ b/flang/test/Transforms/licm.fir
@@ -2087,3 +2087,344 @@ func.func @test_() {
}
return
}
+
+// -----
+// Test hoisting of fir.convert from nested scf.if regions inside a loop.
+// The two fir.convert ops inside separate scf.if blocks should be hoisted
+// before the scf.for, enabling downstream CSE to merge them.
+// CHECK-LABEL: func.func @test_nested_hoist(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<10xf32>>)
+// CHECK-DAG: %[[CST1:.*]] = arith.constant 1.000000e+00 : f32
+// CHECK-DAG: %[[CST2:.*]] = arith.constant 2.000000e+00 : f32
+// CHECK: %[[CONV1:.*]] = fir.convert %[[ARG0]] : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+// CHECK: %[[CONV2:.*]] = fir.convert %[[ARG0]] : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+// CHECK: scf.for %[[I:.*]] =
+// CHECK: scf.if
+// CHECK-NOT: fir.convert
+// CHECK: memref.store %[[CST1]], %[[CONV1]][%[[I]]] : memref<10xf32>
+// CHECK: scf.if
+// CHECK-NOT: fir.convert
+// CHECK: memref.store %[[CST2]], %[[CONV2]][%[[I]]] : memref<10xf32>
+// CHECK: return
+func.func @test_nested_hoist(%arg0: !fir.ref<!fir.array<10xf32>>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c5 = arith.constant 5 : index
+ %c10 = arith.constant 10 : index
+ %cst1 = arith.constant 1.000000e+00 : f32
+ %cst2 = arith.constant 2.000000e+00 : f32
+ scf.for %i = %c0 to %c10 step %c1 {
+ %cmp1 = arith.cmpi slt, %i, %c5 : index
+ scf.if %cmp1 {
+ %mem = fir.convert %arg0 : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+ memref.store %cst1, %mem[%i] : memref<10xf32>
+ }
+ %cmp2 = arith.cmpi sge, %i, %c5 : index
+ scf.if %cmp2 {
+ %mem = fir.convert %arg0 : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+ memref.store %cst2, %mem[%i] : memref<10xf32>
+ }
+ }
+ return
+}
+
+// -----
+// Test hoisting of fir.convert from deeply nested scf.if (scf.if inside
+// scf.if inside scf.for). The fir.convert should be hoisted before the loop.
+// CHECK-LABEL: func.func @test_deeply_nested_hoist(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<10xf32>>)
+// CHECK: %[[CONV:.*]] = fir.convert %[[ARG0]] : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+// CHECK: scf.for %[[I:.*]] =
+// CHECK: scf.if
+// CHECK: scf.if
+// CHECK-NOT: fir.convert
+// CHECK: memref.store %{{.*}}, %[[CONV]][%[[I]]] : memref<10xf32>
+// CHECK: return
+func.func @test_deeply_nested_hoist(%arg0: !fir.ref<!fir.array<10xf32>>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c3 = arith.constant 3 : index
+ %c5 = arith.constant 5 : index
+ %c10 = arith.constant 10 : index
+ %cst = arith.constant 1.000000e+00 : f32
+ scf.for %i = %c0 to %c10 step %c1 {
+ %cmp1 = arith.cmpi sgt, %i, %c3 : index
+ scf.if %cmp1 {
+ %cmp2 = arith.cmpi slt, %i, %c5 : index
+ scf.if %cmp2 {
+ %mem = fir.convert %arg0 : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+ memref.store %cst, %mem[%i] : memref<10xf32>
+ }
+ }
+ }
+ return
+}
+
+// -----
+// Test that fir.convert fully invariant w.r.t. both loops is hoisted all the
+// way out (inner loop top-level LICM hoists it between the loops, then outer
+// loop top-level LICM hoists it before both loops).
+// CHECK-LABEL: func.func @test_nested_loop_fully_invariant(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<100xf32>>)
+// CHECK: %[[CONV:.*]] = fir.convert %[[ARG0]] : (!fir.ref<!fir.array<100xf32>>) -> memref<100xf32>
+// CHECK: scf.for
+// CHECK: scf.for %[[J:.*]] =
+// CHECK-NOT: fir.convert
+// CHECK: memref.store %{{.*}}, %[[CONV]][%[[J]]] : memref<100xf32>
+// CHECK: return
+func.func @test_nested_loop_fully_invariant(%arg0: !fir.ref<!fir.array<100xf32>>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c10 = arith.constant 10 : index
+ %cst = arith.constant 1.000000e+00 : f32
+ scf.for %i = %c0 to %c10 step %c1 {
+ scf.for %j = %c0 to %c10 step %c1 {
+ %mem = fir.convert %arg0 : (!fir.ref<!fir.array<100xf32>>) -> memref<100xf32>
+ memref.store %cst, %mem[%j] : memref<100xf32>
+ }
+ }
+ return
+}
+
+// -----
+// Test that fir.convert inside scf.if inside nested loops is hoisted all the
+// way out when it is fully invariant (nested LICM on inner loop hoists from
+// scf.if, then top-level LICM on each loop hoists further).
+// CHECK-LABEL: func.func @test_nested_loop_with_if_fully_invariant(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<100xf32>>)
+// CHECK: %[[CONV:.*]] = fir.convert %[[ARG0]] : (!fir.ref<!fir.array<100xf32>>) -> memref<100xf32>
+// CHECK: scf.for
+// CHECK: scf.for %[[J:.*]] =
+// CHECK: scf.if
+// CHECK-NOT: fir.convert
+// CHECK: memref.store %{{.*}}, %[[CONV]][%[[J]]] : memref<100xf32>
+// CHECK: return
+func.func @test_nested_loop_with_if_fully_invariant(%arg0: !fir.ref<!fir.array<100xf32>>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c5 = arith.constant 5 : index
+ %c10 = arith.constant 10 : index
+ %cst = arith.constant 1.000000e+00 : f32
+ scf.for %i = %c0 to %c10 step %c1 {
+ scf.for %j = %c0 to %c10 step %c1 {
+ %cmp = arith.cmpi slt, %j, %c5 : index
+ scf.if %cmp {
+ %mem = fir.convert %arg0 : (!fir.ref<!fir.array<100xf32>>) -> memref<100xf32>
+ memref.store %cst, %mem[%j] : memref<100xf32>
+ }
+ }
+ }
+ return
+}
+
+// -----
+// Test that fir.convert using an outer-loop-variant value is hoisted from
+// scf.if to between the two loops (by inner loop's nested LICM), but NOT
+// further (because its operand %ptr depends on the outer loop induction var).
+// CHECK-LABEL: func.func @test_nested_loop_outer_variant(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<100xf32>>)
+// CHECK: scf.for %[[I:.*]] =
+// CHECK: %[[PTR:.*]] = fir.coordinate_of %[[ARG0]], %[[I]]
+// CHECK: %[[CONV:.*]] = fir.convert %[[PTR]] : (!fir.ref<f32>) -> memref<f32>
+// CHECK: scf.for %[[J:.*]] =
+// CHECK: scf.if
+// CHECK-NOT: fir.convert
+// CHECK: memref.store %{{.*}}, %[[CONV]][] : memref<f32>
+// CHECK: return
+func.func @test_nested_loop_outer_variant(%arg0: !fir.ref<!fir.array<100xf32>>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c5 = arith.constant 5 : index
+ %c10 = arith.constant 10 : index
+ %cst = arith.constant 1.000000e+00 : f32
+ scf.for %i = %c0 to %c10 step %c1 {
+ %ptr = fir.coordinate_of %arg0, %i : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+ scf.for %j = %c0 to %c10 step %c1 {
+ %cmp = arith.cmpi slt, %j, %c5 : index
+ scf.if %cmp {
+ %mem = fir.convert %ptr : (!fir.ref<f32>) -> memref<f32>
+ memref.store %cst, %mem[] : memref<f32>
+ }
+ }
+ }
+ return
+}
+
+// -----
+// Test chained fir.convert ops across nested loop and scf.if boundaries.
+// %conv1 is inside the outer scf.if, %conv2 and %conv3 use the chain and are
+// inside scf.if inside an inner loop inside the outer scf.if.
+// All three should be hoisted before both loops via user-propagation:
+// 1. Inner loop nested LICM hoists %conv2, %conv3 from inner scf.if.
+// 2. Inner loop top-level LICM hoists them to the outer scf.if body.
+// 3. Outer loop nested LICM hoists %conv1 from outer scf.if; user
+// propagation then hoists %conv2, %conv3 too.
+// CHECK-LABEL: func.func @test_chained_converts(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<10xf32>>)
+// CHECK: %[[C1:.*]] = fir.convert %[[ARG0]] : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+// CHECK: %[[C2:.*]] = fir.convert %[[C1]] : (memref<10xf32>) -> !fir.ref<!fir.array<10xf32>>
+// CHECK: %[[C3:.*]] = fir.convert %[[C2]] : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+// CHECK: scf.for %{{.*}} =
+// CHECK: scf.if
+// CHECK: scf.for %[[J:.*]] =
+// CHECK: scf.if
+// CHECK-NOT: fir.convert
+// CHECK: memref.store %{{.*}}, %[[C3]][%[[J]]] : memref<10xf32>
+// CHECK: return
+func.func @test_chained_converts(%arg0: !fir.ref<!fir.array<10xf32>>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c5 = arith.constant 5 : index
+ %c10 = arith.constant 10 : index
+ %cst = arith.constant 1.000000e+00 : f32
+ scf.for %i = %c0 to %c10 step %c1 {
+ %cond1 = arith.cmpi slt, %i, %c5 : index
+ scf.if %cond1 {
+ %conv1 = fir.convert %arg0 : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+ scf.for %j = %c0 to %c10 step %c1 {
+ %cond2 = arith.cmpi slt, %j, %c5 : index
+ scf.if %cond2 {
+ %conv2 = fir.convert %conv1 : (memref<10xf32>) -> !fir.ref<!fir.array<10xf32>>
+ %conv3 = fir.convert %conv2 : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+ memref.store %cst, %conv3[%j] : memref<10xf32>
+ }
+ }
+ }
+ }
+ return
+}
+
+// -----
+// Test chained fir.convert ops across deeply nested scf.if regions within
+// a single loop. %conv1 feeds into both branches: one scf.if uses a chain
+// of two converts, the other has a further-nested scf.if with a three-deep
+// chain. All fir.convert ops should be hoisted before the loop.
+// CHECK-LABEL: func.func @test_chained_deep_ifs(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<10xf32>>)
+// CHECK-DAG: %[[CST1:.*]] = arith.constant 1.000000e+00 : f32
+// CHECK-DAG: %[[CST2:.*]] = arith.constant 2.000000e+00 : f32
+// CHECK: %[[C1:.*]] = fir.convert %[[ARG0]] : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+// CHECK: %[[C2A:.*]] = fir.convert %[[C1]] : (memref<10xf32>) -> !fir.ref<!fir.array<10xf32>>
+// CHECK: %[[C3A:.*]] = fir.convert %[[C2A]] : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+// CHECK: %[[C2B:.*]] = fir.convert %[[C1]] : (memref<10xf32>) -> !fir.ref<!fir.array<10xf32>>
+// CHECK: %[[C3B:.*]] = fir.convert %[[C2B]] : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+// CHECK: scf.for %[[I:.*]] =
+// CHECK: scf.if
+// CHECK: scf.if
+// CHECK-NOT: fir.convert
+// CHECK: memref.store %[[CST1]], %[[C3A]][%[[I]]] : memref<10xf32>
+// CHECK: scf.if
+// CHECK: scf.if
+// CHECK-NOT: fir.convert
+// CHECK: memref.store %[[CST2]], %[[C3B]][%[[I]]] : memref<10xf32>
+// CHECK: return
+func.func @test_chained_deep_ifs(%arg0: !fir.ref<!fir.array<10xf32>>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c3 = arith.constant 3 : index
+ %c5 = arith.constant 5 : index
+ %c10 = arith.constant 10 : index
+ %cst1 = arith.constant 1.000000e+00 : f32
+ %cst2 = arith.constant 2.000000e+00 : f32
+ scf.for %i = %c0 to %c10 step %c1 {
+ %cond1 = arith.cmpi slt, %i, %c5 : index
+ scf.if %cond1 {
+ %conv1 = fir.convert %arg0 : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+ %cond2 = arith.cmpi slt, %i, %c3 : index
+ scf.if %cond2 {
+ %conv2a = fir.convert %conv1 : (memref<10xf32>) -> !fir.ref<!fir.array<10xf32>>
+ %conv3a = fir.convert %conv2a : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+ memref.store %cst1, %conv3a[%i] : memref<10xf32>
+ }
+ %cond3 = arith.cmpi sge, %i, %c3 : index
+ scf.if %cond3 {
+ %conv2b = fir.convert %conv1 : (memref<10xf32>) -> !fir.ref<!fir.array<10xf32>>
+ %cond4 = arith.cmpi slt, %i, %c5 : index
+ scf.if %cond4 {
+ %conv3b = fir.convert %conv2b : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+ memref.store %cst2, %conv3b[%i] : memref<10xf32>
+ }
+ }
+ }
+ }
+ return
+}
+
+// -----
+// Test that canMoveOutOf prevents nested hoisting of fir.convert when its
+// operand is a data operand (private variable) of acc.loop, while
+// fir.convert of a non-data operand is still hoisted.
+// CHECK-LABEL: func.func @test_acc_loop_nested_canMoveOutOf(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<10xf32>>)
+// CHECK: acc.parallel combined(loop) {
+// CHECK: %[[PRIV:.*]] = acc.private
+// The non-private fir.convert IS hoisted out of acc.loop:
+// CHECK: %[[CVT_ARG:.*]] = fir.convert %[[ARG0]] : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+// CHECK: acc.loop{{.*}}private(%[[PRIV]] : !fir.ref<f32>)
+// CHECK: scf.if
+// The private fir.convert is NOT hoisted (canMoveOutOf blocks it):
+// CHECK: fir.convert %[[PRIV]] : (!fir.ref<f32>) -> !fir.ref<f32>
+// CHECK: memref.store %{{.*}}, %[[CVT_ARG]]
+func.func @test_acc_loop_nested_canMoveOutOf(%arg0: !fir.ref<!fir.array<10xf32>>) {
+ %cst = arith.constant 1.000000e+00 : f32
+ %c10_i32 = arith.constant 10 : i32
+ %c5_i32 = arith.constant 5 : i32
+ %c1_i32 = arith.constant 1 : i32
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = fir.alloca f32 {bindc_name = "b", uniq_name = "_QFtestEb"}
+ %2 = fir.declare %1 {uniq_name = "_QFtestEb"} : (!fir.ref<f32>) -> !fir.ref<f32>
+ acc.parallel combined(loop) {
+ %priv = acc.private varPtr(%2 : !fir.ref<f32>) recipe(@privatization_ref_f32) -> !fir.ref<f32> {name = "b"}
+ acc.loop combined(parallel) private(%priv : !fir.ref<f32>) control(%arg1 : i32) = (%c1_i32 : i32) to (%c10_i32 : i32) step (%c1_i32 : i32) {
+ %cond = arith.cmpi slt, %arg1, %c5_i32 : i32
+ scf.if %cond {
+ %cvt_priv = fir.convert %priv : (!fir.ref<f32>) -> !fir.ref<f32>
+ fir.store %cst to %cvt_priv : !fir.ref<f32>
+ %cvt_arg = fir.convert %arg0 : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+ %idx = fir.convert %arg1 : (i32) -> index
+ memref.store %cst, %cvt_arg[%idx] : memref<10xf32>
+ }
+ acc.yield
+ } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+ acc.yield
+ }
+ return
+}
+
+// -----
+// Test that canMoveFromDescendant prevents nested hoisting when the parent
+// of the loop is an omp.wsloop (LoopWrapperInterface), which disallows
+// moving operations from its descendants into it.
+// The fir.convert of %arg0 inside scf.if is loop-invariant but must NOT
+// be hoisted because canMoveFromDescendant(omp.wsloop, omp.loop_nest, ...)
+// returns false.
+// CHECK-LABEL: func.func @test_omp_wsloop_nested_canMoveFromDescendant(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<10xf32>>)
+// CHECK: omp.parallel
+// CHECK-NEXT: omp.wsloop
+// CHECK-NEXT: omp.loop_nest
+// CHECK: scf.if
+// CHECK: fir.convert %[[ARG0]]
+func.func @test_omp_wsloop_nested_canMoveFromDescendant(%arg0: !fir.ref<!fir.array<10xf32>>) {
+ %cst = arith.constant 1.000000e+00 : f32
+ %c10_i32 = arith.constant 10 : i32
+ %c5_i32 = arith.constant 5 : i32
+ %c1_i32 = arith.constant 1 : i32
+ %alloca = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtestEi"}
+ %decl_i = fir.declare %alloca {uniq_name = "_QFtestEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ omp.parallel {
+ omp.wsloop private(@_QFtestEi_private_i32 %decl_i -> %arg1 : !fir.ref<i32>) {
+ omp.loop_nest (%arg2) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) {
+ %cond = arith.cmpi slt, %arg2, %c5_i32 : i32
+ scf.if %cond {
+ %cvt = fir.convert %arg0 : (!fir.ref<!fir.array<10xf32>>) -> memref<10xf32>
+ %idx = fir.convert %arg2 : (i32) -> index
+ memref.store %cst, %cvt[%idx] : memref<10xf32>
+ }
+ omp.yield
+ }
+ }
+ omp.terminator
+ }
+ return
+}
More information about the flang-commits
mailing list