[Mlir-commits] [mlir] [mlir][Affine] Fix LICM incorrectly hoisting stores from zero-trip-count loops (PR #189165)

Thu Apr 2 08:06:35 PDT 2026

https://github.com/joker-eph updated https://github.com/llvm/llvm-project/pull/189165

>From 36d389e586896ac31a5dfc1c63e393efac2b2cae Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph at gmail.com>
Date: Sat, 28 Mar 2026 19:34:28 -0700
Subject: [PATCH] [MLIR][Affine] Fix LICM incorrectly hoisting stores from
 loops that may not execute

The affine-loop-invariant-code-motion pass was hoisting side-effectful
operations (e.g. affine.store) out of loops whose body may never execute.
Two cases were affected:

1. Loops with a statically known zero trip count (e.g. affine.for %i = 0 to 0):
   the inner store was unconditionally executed even though the loop never runs.

2. Loops with a dynamic/unknown trip count (e.g. affine.for %i = 0 to %n):
   if %n == 0 at runtime the loop body never executes, but the hoisted store
   would still run, producing incorrect results.

For example, given:
  affine.store %c-58822, %alloc[0]
  affine.for %i = 0 to 0 {
    affine.store %c821775651, %alloc[0]
  }
  %r = affine.load %alloc[0]

The pass incorrectly hoisted the inner store before the loop, making %r
return 821775651 instead of -58822.

Fix: only hoist side-effectful ops when getConstantTripCount(forOp) returns
a value strictly greater than zero, i.e., the loop is statically guaranteed
to execute at least once. Pure (side-effect-free) ops may always be hoisted
since they cannot change observable program state.

Fixes #128273

Assisted-by: Claude Code
---
 .../AffineLoopInvariantCodeMotion.cpp         | 13 +++-
 .../affine-loop-invariant-code-motion.mlir    | 76 ++++++++++++++++++-
 2 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
index c8d8b8d62e59d..3c55830df61c3 100644
--- a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
@@ -12,6 +12,7 @@
 
 #include "mlir/Dialect/Affine/Transforms/Passes.h"
 
+#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
@@ -31,7 +32,6 @@ using namespace mlir::affine;
 namespace {
 
 /// Affine loop invariant code motion (LICM) pass.
-/// TODO: The pass is missing zero tripcount tests.
 /// TODO: When compared to the other standard LICM pass, this pass
 /// has some special handling for affine read/write ops but such handling
 /// requires aliasing to be sound, and as such this pass is unsound. In
@@ -174,6 +174,13 @@ void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) {
   SmallVector<Operation *, 8> opsToMove;
   SmallPtrSet<Operation *, 8> opsWithUsers;
 
+  // Only hoist side-effectful ops when the loop is statically known to execute
+  // at least once. For unknown (dynamic) or zero trip counts we cannot prove
+  // the body executes, so hoisting a side-effectful op would change observable
+  // program semantics. Pure (side-effect-free) ops may always be hoisted.
+  auto tripCount = getConstantTripCount(forOp);
+  bool guaranteedToExecute = tripCount.has_value() && *tripCount > 0;
+
   for (Operation &op : *forOp.getBody()) {
     // Register op in the set of ops that have users. This set is used
     // to prevent hoisting ops that depend on these ops that are
@@ -181,6 +188,10 @@ void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) {
     if (!op.use_empty())
       opsWithUsers.insert(&op);
     if (!isa<AffineYieldOp>(op)) {
+      // Do not hoist ops with side effects unless the loop is guaranteed to
+      // execute at least once.
+      if (!guaranteedToExecute && !isPure(&op))
+        continue;
       if (isOpLoopInvariant(op, forOp, opsWithUsers, opsToHoist)) {
         opsToMove.push_back(&op);
       }
diff --git a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
index 858b7d3ddf9f1..83313f303dbd1 100644
--- a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
+++ b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
@@ -766,11 +766,13 @@ func.func @affine_parallel(%memref_8: memref<4090x2040xf32>, %x: index) {
       }
     }
   }
+  // The store is inside a loop with an unknown (non-constant) trip count, so
+  // it must not be hoisted: the loop might not execute at runtime.
   // CHECK:       affine.parallel
   // CHECK-NEXT:    affine.for
   // CHECK-NEXT:      affine.parallel
-  // CHECK-NEXT:        affine.store
   // CHECK-NEXT:        affine.for
+  // CHECK-NEXT:          affine.store
 
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
@@ -787,8 +789,8 @@ func.func @affine_parallel(%memref_8: memref<4090x2040xf32>, %x: index) {
   // CHECK:       scf.parallel
   // CHECK-NEXT:    affine.for
   // CHECK-NEXT:      affine.parallel
-  // CHECK-NEXT:        affine.store
   // CHECK-NEXT:        affine.for
+  // CHECK-NEXT:          affine.store
 
   affine.for %arg3 = 0 to 32 {
     affine.for %arg4 = 0 to 16 {
@@ -914,3 +916,73 @@ func.func @side_effecting_ops() {
   // CHECK:          memref.dealloc
   return
 }
+
+// -----
+
+// Pure (side-effect-free) ops may still be hoisted from zero-trip-count loops
+// since they have no observable effect. The side-effectful op that uses the
+// hoisted result must remain in the loop.
+
+// CHECK-LABEL: func @hoist_pure_from_zero_trip_count_loop
+func.func @hoist_pure_from_zero_trip_count_loop(%x: i32, %y: i32) -> i32 {
+  // CHECK:      %[[Z:.*]] = arith.addi %arg0, %arg1
+  // CHECK:      affine.for {{.*}} = 0 to 0 {
+  // CHECK-NEXT:   affine.store %[[Z]]
+  // CHECK-NEXT: }
+  %alloc = memref.alloc() : memref<1xi32>
+  affine.for %i = 0 to 0 {
+    // arith.addi is pure and loop-invariant — it should be hoisted even though
+    // the loop never executes.
+    %z = arith.addi %x, %y : i32
+    // The store is not pure so it must stay in the loop.
+    affine.store %z, %alloc[0] : memref<1xi32>
+  }
+  %r = affine.load %alloc[0] : memref<1xi32>
+  return %r : i32
+}
+
+// -----
+
+// Side-effectful ops must not be hoisted from zero-trip-count loops, because
+// those loops never execute and hoisting would change observable behavior.
+
+// CHECK-LABEL: func @no_hoist_from_zero_trip_count_loop
+func.func @no_hoist_from_zero_trip_count_loop(%x: i32) -> i32 {
+  %alloc = memref.alloc() : memref<1xi32>
+  %c42 = arith.constant 42 : i32
+  affine.store %c42, %alloc[0] : memref<1xi32>
+  affine.for %i = 0 to 0 {
+    // This store must not be hoisted: the loop body is never executed.
+    affine.store %x, %alloc[0] : memref<1xi32>
+  }
+  // CHECK:      affine.store %c42{{.*}}, %alloc[0]
+  // CHECK:      affine.for {{.*}} = 0 to 0 {
+  // CHECK-NEXT:   affine.store %arg0, %alloc[0]
+  // CHECK-NEXT: }
+  %r = affine.load %alloc[0] : memref<1xi32>
+  return %r : i32
+}
+
+// -----
+
+// Side-effectful ops must not be hoisted from loops with an unknown (dynamic)
+// trip count, because such a loop may not execute at all (e.g. when the upper
+// bound is zero at runtime). Only pure ops may be hoisted.
+
+// CHECK-LABEL: func @unknown_trip_count_store_not_hoisted
+func.func @unknown_trip_count_store_not_hoisted(%x: i32, %n: index) -> i32 {
+  %alloc = memref.alloc() : memref<1xi32>
+  %c42 = arith.constant 42 : i32
+  affine.store %c42, %alloc[0] : memref<1xi32>
+  affine.for %i = 0 to %n {
+    // This store must not be hoisted: if %n == 0 at runtime the loop body
+    // never executes, and hoisting would change the observable result.
+    affine.store %x, %alloc[0] : memref<1xi32>
+  }
+  // CHECK:      affine.store %c42{{.*}}, %alloc[0]
+  // CHECK:      affine.for {{.*}} = 0 to %{{.*}} {
+  // CHECK-NEXT:   affine.store %arg0, %alloc[0]
+  // CHECK-NEXT: }
+  %r = affine.load %alloc[0] : memref<1xi32>
+  return %r : i32
+}