[Mlir-commits] [mlir] [mlir][Affine] Fix LICM incorrectly hoisting stores from zero-trip-count loops (PR #189165)

Sat Mar 28 05:49:32 PDT 2026

https://github.com/joker-eph created https://github.com/llvm/llvm-project/pull/189165

The affine-loop-invariant-code-motion pass was hoisting side-effectful operations (e.g. affine.store) out of loops whose trip count is statically known to be zero. This caused stores to execute unconditionally even though the loop body should never run, producing incorrect results.

The fix skips hoisting of non-memory-effect-free ops when getConstantTripCount returns 0. Pure/side-effect-free ops are still eligible for hoisting because they cannot change observable program state.

Fixes #128273

Assisted-by: Claude Code

>From 63c77532765b75f2d2bf9bd024debe6716254636 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph at gmail.com>
Date: Fri, 27 Mar 2026 12:09:16 -0700
Subject: [PATCH] [mlir][Affine] Fix LICM incorrectly hoisting stores from
 zero-trip-count loops

The affine-loop-invariant-code-motion pass was hoisting side-effectful
operations (e.g. affine.store) out of loops whose trip count is
statically known to be zero. This caused stores to execute unconditionally
even though the loop body should never run, producing incorrect results.

The fix skips hoisting of non-memory-effect-free ops when
getConstantTripCount returns 0. Pure/side-effect-free ops are still
eligible for hoisting because they cannot change observable program state.

Fixes #128273

Assisted-by: Claude Code
---
 .../AffineLoopInvariantCodeMotion.cpp         | 11 ++-
 .../affine-loop-invariant-code-motion.mlir    | 85 +++++++++++++++++++
 2 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
index c8d8b8d62e59d..a7b582a30c918 100644
--- a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
@@ -12,6 +12,7 @@
 
 #include "mlir/Dialect/Affine/Transforms/Passes.h"
 
+#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
@@ -31,7 +32,6 @@ using namespace mlir::affine;
 namespace {
 
 /// Affine loop invariant code motion (LICM) pass.
-/// TODO: The pass is missing zero tripcount tests.
 /// TODO: When compared to the other standard LICM pass, this pass
 /// has some special handling for affine read/write ops but such handling
 /// requires aliasing to be sound, and as such this pass is unsound. In
@@ -174,6 +174,13 @@ void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) {
   SmallVector<Operation *, 8> opsToMove;
   SmallPtrSet<Operation *, 8> opsWithUsers;
 
+  // If the trip count is statically known to be zero, do not hoist ops with
+  // side effects: the loop body never executes, so hoisting would cause them
+  // to run unconditionally, changing program semantics. Pure (side-effect-free
+  // and speculatable) ops are still eligible for hoisting.
+  std::optional<uint64_t> tripCount = getConstantTripCount(forOp);
+  bool zeroTripCount = tripCount.has_value() && *tripCount == 0;
+
   for (Operation &op : *forOp.getBody()) {
     // Register op in the set of ops that have users. This set is used
     // to prevent hoisting ops that depend on these ops that are
@@ -181,6 +188,8 @@ void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) {
     if (!op.use_empty())
       opsWithUsers.insert(&op);
     if (!isa<AffineYieldOp>(op)) {
+      if (zeroTripCount && !isPure(&op))
+        continue;
       if (isOpLoopInvariant(op, forOp, opsWithUsers, opsToHoist)) {
         opsToMove.push_back(&op);
       }
diff --git a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
index 858b7d3ddf9f1..925c8b3db7d14 100644
--- a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
+++ b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
@@ -914,3 +914,88 @@ func.func @side_effecting_ops() {
   // CHECK:          memref.dealloc
   return
 }
+
+// -----
+
+// Side-effectful ops must not be hoisted from a zero-trip-count loop.
+// The store must remain inside the loop; the load after the loop should see
+// the initial value, not the value the store would have written.
+
+// CHECK-LABEL: func @zero_trip_count_no_hoist_store
+func.func @zero_trip_count_no_hoist_store() -> i32 {
+  %idx0 = index.constant 0
+  %c_inner = arith.constant 42 : i32
+  %c_init = arith.constant 0 : i32
+  %alloc = memref.alloc() : memref<1xi32>
+  memref.store %c_init, %alloc[%idx0] : memref<1xi32>
+  // CHECK-NOT: affine.store
+  // CHECK:     affine.for
+  affine.for %i = 0 to 0 {
+    // CHECK-NEXT:   affine.store
+    affine.store %c_inner, %alloc[%idx0] : memref<1xi32>
+  }
+  %result = memref.load %alloc[%idx0] : memref<1xi32>
+  return %result : i32
+}
+
+// -----
+
+// Pure (side-effect-free and speculatable) ops ARE eligible for hoisting from
+// a zero-trip-count loop since they cannot change observable program state.
+
+// CHECK-LABEL: func @zero_trip_count_hoist_pure
+func.func @zero_trip_count_hoist_pure() -> i32 {
+  %c1 = arith.constant 1 : i32
+  %c2 = arith.constant 2 : i32
+  // CHECK: arith.addi
+  // CHECK: affine.for
+  affine.for %i = 0 to 0 {
+    %sum = arith.addi %c1, %c2 : i32
+  }
+  return %c1 : i32
+}
+
+// -----
+
+// Inverted bounds (lb > ub) also produce a zero trip count; stores must not
+// be hoisted in that case either.
+
+// CHECK-LABEL: func @inverted_bounds_no_hoist_store
+func.func @inverted_bounds_no_hoist_store() -> i32 {
+  %idx0 = index.constant 0
+  %c_inner = arith.constant 42 : i32
+  %c_init = arith.constant 0 : i32
+  %alloc = memref.alloc() : memref<1xi32>
+  memref.store %c_init, %alloc[%idx0] : memref<1xi32>
+  // CHECK-NOT: affine.store
+  // CHECK:     affine.for
+  affine.for %i = 5 to 0 {
+    // CHECK-NEXT:   affine.store
+    affine.store %c_inner, %alloc[%idx0] : memref<1xi32>
+  }
+  %result = memref.load %alloc[%idx0] : memref<1xi32>
+  return %result : i32
+}
+
+// -----
+
+// When the trip count is not statically known (symbolic upper bound), the pass
+// cannot determine whether the loop executes at all, so it conservatively
+// proceeds with normal hoisting. This is a known pre-existing soundness
+// limitation: if the loop happens to execute zero times at runtime, a hoisted
+// store will incorrectly execute. Document current behavior here so any future
+// fix to this soundness gap is visible.
+
+// CHECK-LABEL: func @unknown_trip_count_store_hoisted
+func.func @unknown_trip_count_store_hoisted(%n : index) {
+  %idx0 = index.constant 0
+  %c42 = arith.constant 42 : i32
+  %alloc = memref.alloc() : memref<1xi32>
+  // CHECK: affine.store
+  // CHECK: affine.for
+  affine.for %i = 0 to %n {
+    // Loop-invariant store is hoisted even though %n may be zero at runtime.
+    affine.store %c42, %alloc[%idx0] : memref<1xi32>
+  }
+  return
+}