[Mlir-commits] [mlir] 73bcfb6 - [mlir][Affine] Fix LICM incorrectly hoisting stores from zero-trip-count loops (#189165)

Fri Apr 3 04:07:31 PDT 2026

Author: Mehdi Amini
Date: 2026-04-03T13:07:26+02:00
New Revision: 73bcfb6824a9bfdeee215590c541cce745e556ef

URL: https://github.com/llvm/llvm-project/commit/73bcfb6824a9bfdeee215590c541cce745e556ef
DIFF: https://github.com/llvm/llvm-project/commit/73bcfb6824a9bfdeee215590c541cce745e556ef.diff

LOG: [mlir][Affine] Fix LICM incorrectly hoisting stores from zero-trip-count loops (#189165)

The affine-loop-invariant-code-motion pass was hoisting side-effectful
operations (e.g. affine.store) out of loops whose trip count is
statically known to be zero. This caused stores to execute
unconditionally even though the loop body should never run, producing
incorrect results.

The fix skips hoisting of non-memory-effect-free ops when
getConstantTripCount returns 0. Pure/side-effect-free ops are still
eligible for hoisting because they cannot change observable program
state.

Fixes #128273

Assisted-by: Claude Code

Added: 
    

Modified: 
    mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
    mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
index c8d8b8d62e59d..3c55830df61c3 100644

--- a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
@@ -12,6 +12,7 @@
 
 #include "mlir/Dialect/Affine/Transforms/Passes.h"
 
+#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
@@ -31,7 +32,6 @@ using namespace mlir::affine;
 namespace {
 
 /// Affine loop invariant code motion (LICM) pass.
-/// TODO: The pass is missing zero tripcount tests.
 /// TODO: When compared to the other standard LICM pass, this pass
 /// has some special handling for affine read/write ops but such handling
 /// requires aliasing to be sound, and as such this pass is unsound. In
@@ -174,6 +174,13 @@ void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) {
   SmallVector<Operation *, 8> opsToMove;
   SmallPtrSet<Operation *, 8> opsWithUsers;
 
+  // Only hoist side-effectful ops when the loop is statically known to execute
+  // at least once. For unknown (dynamic) or zero trip counts we cannot prove
+  // the body executes, so hoisting a side-effectful op would change observable
+  // program semantics. Pure (side-effect-free) ops may always be hoisted.
+  auto tripCount = getConstantTripCount(forOp);
+  bool guaranteedToExecute = tripCount.has_value() && *tripCount > 0;
+
   for (Operation &op : *forOp.getBody()) {
     // Register op in the set of ops that have users. This set is used
     // to prevent hoisting ops that depend on these ops that are
@@ -181,6 +188,10 @@ void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) {
     if (!op.use_empty())
       opsWithUsers.insert(&op);
     if (!isa<AffineYieldOp>(op)) {
+      // Do not hoist ops with side effects unless the loop is guaranteed to
+      // execute at least once.
+      if (!guaranteedToExecute && !isPure(&op))
+        continue;
       if (isOpLoopInvariant(op, forOp, opsWithUsers, opsToHoist)) {
         opsToMove.push_back(&op);
       }

diff  --git a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
index 858b7d3ddf9f1..83313f303dbd1 100644
--- a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
+++ b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
@@ -766,11 +766,13 @@ func.func @affine_parallel(%memref_8: memref<4090x2040xf32>, %x: index) {
       }
     }
   }
+  // The store is inside a loop with an unknown (non-constant) trip count, so
+  // it must not be hoisted: the loop might not execute at runtime.
   // CHECK:       affine.parallel
   // CHECK-NEXT:    affine.for
   // CHECK-NEXT:      affine.parallel
-  // CHECK-NEXT:        affine.store
   // CHECK-NEXT:        affine.for
+  // CHECK-NEXT:          affine.store
 
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
@@ -787,8 +789,8 @@ func.func @affine_parallel(%memref_8: memref<4090x2040xf32>, %x: index) {
   // CHECK:       scf.parallel
   // CHECK-NEXT:    affine.for
   // CHECK-NEXT:      affine.parallel
-  // CHECK-NEXT:        affine.store
   // CHECK-NEXT:        affine.for
+  // CHECK-NEXT:          affine.store
 
   affine.for %arg3 = 0 to 32 {
     affine.for %arg4 = 0 to 16 {
@@ -914,3 +916,73 @@ func.func @side_effecting_ops() {
   // CHECK:          memref.dealloc
   return
 }
+
+// -----
+
+// Pure (side-effect-free) ops may still be hoisted from zero-trip-count loops
+// since they have no observable effect. The side-effectful op that uses the
+// hoisted result must remain in the loop.
+
+// CHECK-LABEL: func @hoist_pure_from_zero_trip_count_loop
+func.func @hoist_pure_from_zero_trip_count_loop(%x: i32, %y: i32) -> i32 {
+  // CHECK:      %[[Z:.*]] = arith.addi %arg0, %arg1
+  // CHECK:      affine.for {{.*}} = 0 to 0 {
+  // CHECK-NEXT:   affine.store %[[Z]]
+  // CHECK-NEXT: }
+  %alloc = memref.alloc() : memref<1xi32>
+  affine.for %i = 0 to 0 {
+    // arith.addi is pure and loop-invariant — it should be hoisted even though
+    // the loop never executes.
+    %z = arith.addi %x, %y : i32
+    // The store is not pure so it must stay in the loop.
+    affine.store %z, %alloc[0] : memref<1xi32>
+  }
+  %r = affine.load %alloc[0] : memref<1xi32>
+  return %r : i32
+}
+
+// -----
+
+// Side-effectful ops must not be hoisted from zero-trip-count loops, because
+// those loops never execute and hoisting would change observable behavior.
+
+// CHECK-LABEL: func @no_hoist_from_zero_trip_count_loop
+func.func @no_hoist_from_zero_trip_count_loop(%x: i32) -> i32 {
+  %alloc = memref.alloc() : memref<1xi32>
+  %c42 = arith.constant 42 : i32
+  affine.store %c42, %alloc[0] : memref<1xi32>
+  affine.for %i = 0 to 0 {
+    // This store must not be hoisted: the loop body is never executed.
+    affine.store %x, %alloc[0] : memref<1xi32>
+  }
+  // CHECK:      affine.store %c42{{.*}}, %alloc[0]
+  // CHECK:      affine.for {{.*}} = 0 to 0 {
+  // CHECK-NEXT:   affine.store %arg0, %alloc[0]
+  // CHECK-NEXT: }
+  %r = affine.load %alloc[0] : memref<1xi32>
+  return %r : i32
+}
+
+// -----
+
+// Side-effectful ops must not be hoisted from loops with an unknown (dynamic)
+// trip count, because such a loop may not execute at all (e.g. when the upper
+// bound is zero at runtime). Only pure ops may be hoisted.
+
+// CHECK-LABEL: func @unknown_trip_count_store_not_hoisted
+func.func @unknown_trip_count_store_not_hoisted(%x: i32, %n: index) -> i32 {
+  %alloc = memref.alloc() : memref<1xi32>
+  %c42 = arith.constant 42 : i32
+  affine.store %c42, %alloc[0] : memref<1xi32>
+  affine.for %i = 0 to %n {
+    // This store must not be hoisted: if %n == 0 at runtime the loop body
+    // never executes, and hoisting would change the observable result.
+    affine.store %x, %alloc[0] : memref<1xi32>
+  }
+  // CHECK:      affine.store %c42{{.*}}, %alloc[0]
+  // CHECK:      affine.for {{.*}} = 0 to %{{.*}} {
+  // CHECK-NEXT:   affine.store %arg0, %alloc[0]
+  // CHECK-NEXT: }
+  %r = affine.load %alloc[0] : memref<1xi32>
+  return %r : i32
+}