[Mlir-commits] [mlir] multiply lower bound in loop range folding (PR #111875)

Thu Oct 10 10:10:03 PDT 2024

https://github.com/superlopuh created https://github.com/llvm/llvm-project/pull/111875

Fixes #83482

>From 9a9169c7c00fd2378b5c8d83d1b3ddbc55d2dff5 Mon Sep 17 00:00:00 2001
From: Sasha Lopoukhine <superlopuh at gmail.com>
Date: Thu, 10 Oct 2024 16:51:34 +0100
Subject: [PATCH] multiply lower bound in loop range folding

---
 .../SCF/Transforms/LoopRangeFolding.cpp       |  2 +
 mlir/test/Dialect/SCF/loop-range.mlir         | 80 ++++++++++++++-----
 2 files changed, 62 insertions(+), 20 deletions(-)

diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopRangeFolding.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopRangeFolding.cpp
index 9ca441f6431835..0c2a71eb25fe5d 100644
--- a/mlir/lib/Dialect/SCF/Transforms/LoopRangeFolding.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/LoopRangeFolding.cpp
@@ -72,9 +72,11 @@ void ForLoopRangeFolding::runOnOperation() {
         op.setUpperBound(ubFold->getResult(0));
 
       } else if (isa<arith::MulIOp>(user)) {
+        Operation *lbFold = b.clone(*user, lbMap);
         Operation *ubFold = b.clone(*user, ubMap);
         Operation *stepFold = b.clone(*user, stepMap);
 
+        op.setLowerBound(lbFold->getResult(0));
         op.setUpperBound(ubFold->getResult(0));
         op.setStep(stepFold->getResult(0));
       }
diff --git a/mlir/test/Dialect/SCF/loop-range.mlir b/mlir/test/Dialect/SCF/loop-range.mlir
index cd3b4861fc18d1..b0a421256af39a 100644
--- a/mlir/test/Dialect/SCF/loop-range.mlir
+++ b/mlir/test/Dialect/SCF/loop-range.mlir
@@ -21,12 +21,13 @@ func.func @fold_one_loop(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
 // CHECK:       %[[C4:.*]] = arith.constant 4 : index
 // CHECK:       %[[I0:.*]] = arith.addi %[[ARG2]], %[[C0]] : index
 // CHECK:       %[[I1:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index
-// CHECK:       %[[I2:.*]] = arith.muli %[[I1]], %[[C4]] : index
-// CHECK:       %[[I3:.*]] = arith.muli %[[C1]], %[[C4]] : index
-// CHECK:       scf.for %[[I:.*]] = %[[I0]] to %[[I2]] step %[[I3]] {
-// CHECK:         %[[I4:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
-// CHECK:         %[[I5:.*]] = arith.muli %[[I4]], %[[I4]] : i32
-// CHECK:         memref.store %[[I5]], %[[ARG0]]{{\[}}%[[I]]
+// CHECK:       %[[I2:.*]] = arith.muli %[[I0]], %[[C4]] : index
+// CHECK:       %[[I3:.*]] = arith.muli %[[I1]], %[[C4]] : index
+// CHECK:       %[[I4:.*]] = arith.muli %[[C1]], %[[C4]] : index
+// CHECK:       scf.for %[[I:.*]] = %[[I2]] to %[[I3]] step %[[I4]] {
+// CHECK:         %[[I5:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
+// CHECK:         %[[I6:.*]] = arith.muli %[[I5]], %[[I5]] : i32
+// CHECK:         memref.store %[[I6]], %[[ARG0]]{{\[}}%[[I]]
 
 func.func @fold_one_loop2(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
   %c0 = arith.constant 0 : index
@@ -54,12 +55,48 @@ func.func @fold_one_loop2(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
 // CHECK:       scf.for %[[J:.*]] = %[[C0]] to %[[C10]] step %[[C1]] {
 // CHECK:         %[[I0:.*]] = arith.addi %[[ARG2]], %[[C0]] : index
 // CHECK:         %[[I1:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index
-// CHECK:         %[[I2:.*]] = arith.muli %[[I1]], %[[C4]] : index
-// CHECK:         %[[I3:.*]] = arith.muli %[[C1]], %[[C4]] : index
-// CHECK:         scf.for %[[I:.*]] = %[[I0]] to %[[I2]] step %[[I3]] {
-// CHECK:           %[[I4:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
-// CHECK:           %[[I5:.*]] = arith.muli %[[I4]], %[[I4]] : i32
-// CHECK:           memref.store %[[I5]], %[[ARG0]]{{\[}}%[[I]]
+// CHECK:         %[[I2:.*]] = arith.muli %[[I0]], %[[C4]] : index
+// CHECK:         %[[I3:.*]] = arith.muli %[[I1]], %[[C4]] : index
+// CHECK:         %[[I4:.*]] = arith.muli %[[C1]], %[[C4]] : index
+// CHECK:         scf.for %[[I:.*]] = %[[I2]] to %[[I3]] step %[[I4]] {
+// CHECK:           %[[I5:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
+// CHECK:           %[[I6:.*]] = arith.muli %[[I5]], %[[I5]] : i32
+// CHECK:           memref.store %[[I6]], %[[ARG0]]{{\[}}%[[I]]
+
+func.func @fold_one_loop3(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c4 = arith.constant 4 : index
+  %c10 = arith.constant 10 : index
+  scf.for %j = %c0 to %c10 step %c1 {
+    scf.for %i = %c1 to %arg1 step %c1 {
+      %0 = arith.addi %arg2, %i : index
+      %1 = arith.muli %0, %c4 : index
+      %2 = memref.load %arg0[%1] : memref<?xi32>
+      %3 = arith.muli %2, %2 : i32
+      memref.store %3, %arg0[%1] : memref<?xi32>
+    }
+  }
+  return
+}
+
+// CHECK-LABEL: func @fold_one_loop3
+// CHECK-SAME:   (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}
+// CHECK:       %[[C0:.*]] = arith.constant 0 : index
+// CHECK:       %[[C1:.*]] = arith.constant 1 : index
+// CHECK:       %[[C4:.*]] = arith.constant 4 : index
+// CHECK:       %[[C10:.*]] = arith.constant 10 : index
+// CHECK:       scf.for %[[J:.*]] = %[[C0]] to %[[C10]] step %[[C1]] {
+// CHECK:         %[[I0:.*]] = arith.addi %[[ARG2]], %[[C1]] : index
+// CHECK:         %[[I1:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index
+// CHECK:         %[[I2:.*]] = arith.muli %[[I0]], %[[C4]] : index
+// CHECK:         %[[I3:.*]] = arith.muli %[[I1]], %[[C4]] : index
+// CHECK:         %[[I4:.*]] = arith.muli %[[C1]], %[[C4]] : index
+// CHECK:         scf.for %[[I:.*]] = %[[I2]] to %[[I3]] step %[[I4]] {
+// CHECK:           %[[I5:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
+// CHECK:           %[[I6:.*]] = arith.muli %[[I5]], %[[I5]] : i32
+// CHECK:           memref.store %[[I6]], %[[ARG0]]{{\[}}%[[I]]
+
 
 func.func @fold_two_loops(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
   %c0 = arith.constant 0 : index
@@ -86,14 +123,17 @@ func.func @fold_two_loops(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
 // CHECK:       %[[C10:.*]] = arith.constant 10 : index
 // CHECK:       %[[I0:.*]] = arith.addi %[[ARG2]], %[[C0]] : index
 // CHECK:       %[[I1:.*]] = arith.addi %[[ARG2]], %[[C10]] : index
-// CHECK:       scf.for %[[J:.*]] = %[[I0]] to %[[I1]] step %[[C1]] {
-// CHECK:         %[[I1:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index
-// CHECK:         %[[I2:.*]] = arith.muli %[[I1]], %[[C4]] : index
-// CHECK:         %[[I3:.*]] = arith.muli %[[C1]], %[[C4]] : index
-// CHECK:         scf.for %[[I:.*]] = %[[J]] to %[[I2]] step %[[I3]] {
-// CHECK:           %[[I4:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
-// CHECK:           %[[I5:.*]] = arith.muli %[[I4]], %[[I4]] : i32
-// CHECK:           memref.store %[[I5]], %[[ARG0]]{{\[}}%[[I]]
+// CHECK:       %[[I2:.*]] = arith.muli %[[I0]], %[[C4]] : index
+// CHECK:       %[[I3:.*]] = arith.muli %[[I1]], %[[C4]] : index
+// CHECK:       %[[I4:.*]] = arith.muli %[[C1]], %[[C4]] : index
+// CHECK:       scf.for %[[J:.*]] = %[[I2]] to %[[I3]] step %[[I4]] {
+// CHECK:         %[[I5:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index
+// CHECK:         %[[I6:.*]] = arith.muli %[[I5]], %[[C4]] : index
+// CHECK:         %[[I7:.*]] = arith.muli %[[C1]], %[[C4]] : index
+// CHECK:         scf.for %[[I:.*]] = %[[J]] to %[[I6]] step %[[I7]] {
+// CHECK:           %[[I8:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
+// CHECK:           %[[I9:.*]] = arith.muli %[[I8]], %[[I8]] : i32
+// CHECK:           memref.store %[[I9]], %[[ARG0]]{{\[}}%[[I]]
 
 // If an instruction's operands are not defined outside the loop, we cannot
 // perform the optimization, as is the case with the arith.muli below. (If