[Mlir-commits] [mlir] [mlir][Affine] Fix LICM incorrectly hoisting stores from zero-trip-count loops (PR #189165)
Mehdi Amini
llvmlistbot at llvm.org
Thu Apr 2 08:06:35 PDT 2026
https://github.com/joker-eph updated https://github.com/llvm/llvm-project/pull/189165
>From 36d389e586896ac31a5dfc1c63e393efac2b2cae Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph at gmail.com>
Date: Sat, 28 Mar 2026 19:34:28 -0700
Subject: [PATCH] [MLIR][Affine] Fix LICM incorrectly hoisting stores from
loops that may not execute
The affine-loop-invariant-code-motion pass was hoisting side-effectful
operations (e.g. affine.store) out of loops whose body may never execute.
Two cases were affected:
1. Loops with a statically known zero trip count (e.g. affine.for %i = 0 to 0):
the inner store was unconditionally executed even though the loop never runs.
2. Loops with a dynamic/unknown trip count (e.g. affine.for %i = 0 to %n):
if %n == 0 at runtime the loop body never executes, but the hoisted store
would still run, producing incorrect results.
For example, given:
affine.store %c-58822, %alloc[0]
affine.for %i = 0 to 0 {
affine.store %c821775651, %alloc[0]
}
%r = affine.load %alloc[0]
The pass incorrectly hoisted the inner store before the loop, making %r
return 821775651 instead of -58822.
Fix: only hoist side-effectful ops when getConstantTripCount(forOp) returns
a value strictly greater than zero, i.e., the loop is statically guaranteed
to execute at least once. Pure (side-effect-free) ops may always be hoisted
since they cannot change observable program state.
Fixes #128273
Assisted-by: Claude Code
---
.../AffineLoopInvariantCodeMotion.cpp | 13 +++-
.../affine-loop-invariant-code-motion.mlir | 76 ++++++++++++++++++-
2 files changed, 86 insertions(+), 3 deletions(-)
diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
index c8d8b8d62e59d..3c55830df61c3 100644
--- a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
@@ -12,6 +12,7 @@
#include "mlir/Dialect/Affine/Transforms/Passes.h"
+#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
#include "mlir/Dialect/Affine/Analysis/Utils.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
@@ -31,7 +32,6 @@ using namespace mlir::affine;
namespace {
/// Affine loop invariant code motion (LICM) pass.
-/// TODO: The pass is missing zero tripcount tests.
/// TODO: When compared to the other standard LICM pass, this pass
/// has some special handling for affine read/write ops but such handling
/// requires aliasing to be sound, and as such this pass is unsound. In
@@ -174,6 +174,13 @@ void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) {
SmallVector<Operation *, 8> opsToMove;
SmallPtrSet<Operation *, 8> opsWithUsers;
+ // Only hoist side-effectful ops when the loop is statically known to execute
+ // at least once. For unknown (dynamic) or zero trip counts we cannot prove
+ // the body executes, so hoisting a side-effectful op would change observable
+ // program semantics. Pure (side-effect-free) ops may always be hoisted.
+ auto tripCount = getConstantTripCount(forOp);
+ bool guaranteedToExecute = tripCount.has_value() && *tripCount > 0;
+
for (Operation &op : *forOp.getBody()) {
// Register op in the set of ops that have users. This set is used
// to prevent hoisting ops that depend on these ops that are
@@ -181,6 +188,10 @@ void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) {
if (!op.use_empty())
opsWithUsers.insert(&op);
if (!isa<AffineYieldOp>(op)) {
+ // Do not hoist ops with side effects unless the loop is guaranteed to
+ // execute at least once.
+ if (!guaranteedToExecute && !isPure(&op))
+ continue;
if (isOpLoopInvariant(op, forOp, opsWithUsers, opsToHoist)) {
opsToMove.push_back(&op);
}
diff --git a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
index 858b7d3ddf9f1..83313f303dbd1 100644
--- a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
+++ b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
@@ -766,11 +766,13 @@ func.func @affine_parallel(%memref_8: memref<4090x2040xf32>, %x: index) {
}
}
}
+ // The store is inside a loop with an unknown (non-constant) trip count, so
+ // it must not be hoisted: the loop might not execute at runtime.
// CHECK: affine.parallel
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.parallel
- // CHECK-NEXT: affine.store
// CHECK-NEXT: affine.for
+ // CHECK-NEXT: affine.store
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -787,8 +789,8 @@ func.func @affine_parallel(%memref_8: memref<4090x2040xf32>, %x: index) {
// CHECK: scf.parallel
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.parallel
- // CHECK-NEXT: affine.store
// CHECK-NEXT: affine.for
+ // CHECK-NEXT: affine.store
affine.for %arg3 = 0 to 32 {
affine.for %arg4 = 0 to 16 {
@@ -914,3 +916,73 @@ func.func @side_effecting_ops() {
// CHECK: memref.dealloc
return
}
+
+// -----
+
+// Pure (side-effect-free) ops may still be hoisted from zero-trip-count loops
+// since they have no observable effect. The side-effectful op that uses the
+// hoisted result must remain in the loop.
+
+// CHECK-LABEL: func @hoist_pure_from_zero_trip_count_loop
+func.func @hoist_pure_from_zero_trip_count_loop(%x: i32, %y: i32) -> i32 {
+ // CHECK: %[[Z:.*]] = arith.addi %arg0, %arg1
+ // CHECK: affine.for {{.*}} = 0 to 0 {
+ // CHECK-NEXT: affine.store %[[Z]]
+ // CHECK-NEXT: }
+ %alloc = memref.alloc() : memref<1xi32>
+ affine.for %i = 0 to 0 {
+ // arith.addi is pure and loop-invariant — it should be hoisted even though
+ // the loop never executes.
+ %z = arith.addi %x, %y : i32
+ // The store is not pure so it must stay in the loop.
+ affine.store %z, %alloc[0] : memref<1xi32>
+ }
+ %r = affine.load %alloc[0] : memref<1xi32>
+ return %r : i32
+}
+
+// -----
+
+// Side-effectful ops must not be hoisted from zero-trip-count loops, because
+// those loops never execute and hoisting would change observable behavior.
+
+// CHECK-LABEL: func @no_hoist_from_zero_trip_count_loop
+func.func @no_hoist_from_zero_trip_count_loop(%x: i32) -> i32 {
+ %alloc = memref.alloc() : memref<1xi32>
+ %c42 = arith.constant 42 : i32
+ affine.store %c42, %alloc[0] : memref<1xi32>
+ affine.for %i = 0 to 0 {
+ // This store must not be hoisted: the loop body is never executed.
+ affine.store %x, %alloc[0] : memref<1xi32>
+ }
+ // CHECK: affine.store %c42{{.*}}, %alloc[0]
+ // CHECK: affine.for {{.*}} = 0 to 0 {
+ // CHECK-NEXT: affine.store %arg0, %alloc[0]
+ // CHECK-NEXT: }
+ %r = affine.load %alloc[0] : memref<1xi32>
+ return %r : i32
+}
+
+// -----
+
+// Side-effectful ops must not be hoisted from loops with an unknown (dynamic)
+// trip count, because such a loop may not execute at all (e.g. when the upper
+// bound is zero at runtime). Only pure ops may be hoisted.
+
+// CHECK-LABEL: func @unknown_trip_count_store_not_hoisted
+func.func @unknown_trip_count_store_not_hoisted(%x: i32, %n: index) -> i32 {
+ %alloc = memref.alloc() : memref<1xi32>
+ %c42 = arith.constant 42 : i32
+ affine.store %c42, %alloc[0] : memref<1xi32>
+ affine.for %i = 0 to %n {
+ // This store must not be hoisted: if %n == 0 at runtime the loop body
+ // never executes, and hoisting would change the observable result.
+ affine.store %x, %alloc[0] : memref<1xi32>
+ }
+ // CHECK: affine.store %c42{{.*}}, %alloc[0]
+ // CHECK: affine.for {{.*}} = 0 to %{{.*}} {
+ // CHECK-NEXT: affine.store %arg0, %alloc[0]
+ // CHECK-NEXT: }
+ %r = affine.load %alloc[0] : memref<1xi32>
+ return %r : i32
+}
More information about the Mlir-commits
mailing list