[Mlir-commits] [mlir] 9194071 - [mlir] Support hoisting whole affine for loops in LICM

Tue Apr 20 18:16:15 PDT 2021

Author: Amy Zhuang
Date: 2021-04-20T18:07:06-07:00
New Revision: 9194071626a64b685c8800977fc3a77476cf6136

URL: https://github.com/llvm/llvm-project/commit/9194071626a64b685c8800977fc3a77476cf6136
DIFF: https://github.com/llvm/llvm-project/commit/9194071626a64b685c8800977fc3a77476cf6136.diff

LOG: [mlir] Support hoisting whole affine for loops in LICM

Reviewed By: bondhugula

Differential Revision: https://reviews.llvm.org/D100512

Added: 
    

Modified: 
    mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
    mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
index 45510703ff7fe..a16eac6f16a28 100644

--- a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
@@ -71,10 +71,11 @@ bool isOpLoopInvariant(Operation &op, Value indVar,
     if (!checkInvarianceOfNestedIfOps(&op, indVar, opsWithUsers, opsToHoist)) {
       return false;
     }
-  } else if (isa<AffineForOp>(op)) {
-    // If the body of a predicated region has a for loop, we don't hoist the
-    // 'affine.if'.
-    return false;
+  } else if (auto forOp = dyn_cast<AffineForOp>(op)) {
+    if (!areAllOpsInTheBlockListInvariant(forOp.getLoopBody(), indVar,
+                                          opsWithUsers, opsToHoist)) {
+      return false;
+    }
   } else if (isa<AffineDmaStartOp, AffineDmaWaitOp>(op)) {
     // TODO: Support DMA ops.
     return false;
@@ -113,29 +114,29 @@ bool isOpLoopInvariant(Operation &op, Value indVar,
       LLVM_DEBUG(llvm::dbgs() << "\nNon-constant op with 0 operands\n");
       return false;
     }
-    for (unsigned int i = 0; i < op.getNumOperands(); ++i) {
-      auto *operandSrc = op.getOperand(i).getDefiningOp();
+  }
 
-      LLVM_DEBUG(
-          op.getOperand(i).print(llvm::dbgs() << "\nIterating on operand\n"));
+  // Check operands.
+  for (unsigned int i = 0; i < op.getNumOperands(); ++i) {
+    auto *operandSrc = op.getOperand(i).getDefiningOp();
 
-      // If the loop IV is the operand, this op isn't loop invariant.
-      if (indVar == op.getOperand(i)) {
-        LLVM_DEBUG(llvm::dbgs() << "\nLoop IV is the operand\n");
-        return false;
-      }
+    LLVM_DEBUG(
+        op.getOperand(i).print(llvm::dbgs() << "\nIterating on operand\n"));
 
-      if (operandSrc != nullptr) {
-        LLVM_DEBUG(llvm::dbgs()
-                   << *operandSrc << "\nIterating on operand src\n");
+    // If the loop IV is the operand, this op isn't loop invariant.
+    if (indVar == op.getOperand(i)) {
+      LLVM_DEBUG(llvm::dbgs() << "\nLoop IV is the operand\n");
+      return false;
+    }
 
-        // If the value was defined in the loop (outside of the
-        // if/else region), and that operation itself wasn't meant to
-        // be hoisted, then mark this operation loop dependent.
-        if (opsWithUsers.count(operandSrc) &&
-            opsToHoist.count(operandSrc) == 0) {
-          return false;
-        }
+    if (operandSrc != nullptr) {
+      LLVM_DEBUG(llvm::dbgs() << *operandSrc << "\nIterating on operand src\n");
+
+      // If the value was defined in the loop (outside of the
+      // if/else region), and that operation itself wasn't meant to
+      // be hoisted, then mark this operation loop dependent.
+      if (opsWithUsers.count(operandSrc) && opsToHoist.count(operandSrc) == 0) {
+        return false;
       }
     }
   }
@@ -198,12 +199,9 @@ void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) {
     // not being hoisted.
     if (!op.use_empty())
       opsWithUsers.insert(&op);
-    // We don't hoist for loops.
-    if (!isa<AffineForOp>(op)) {
-      if (!isa<AffineYieldOp>(op)) {
-        if (isOpLoopInvariant(op, indVar, opsWithUsers, opsToHoist)) {
-          opsToMove.push_back(&op);
-        }
+    if (!isa<AffineYieldOp>(op)) {
+      if (isOpLoopInvariant(op, indVar, opsWithUsers, opsToHoist)) {
+        opsToMove.push_back(&op);
       }
     }
   }

diff  --git a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
index b7bf7ac2e7c25..2203bffe3ddb8 100644
--- a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
+++ b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
@@ -17,6 +17,8 @@ func @nested_loops_both_having_invariant_code() {
   // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
   // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
   // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT: }
+  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
   // CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
 
   return
@@ -67,6 +69,33 @@ func @nested_loops_code_invariant_to_both() {
 
 // -----
 
+// CHECK-LABEL: func @nested_loops_inner_loops_invariant_to_outermost_loop
+func @nested_loops_inner_loops_invariant_to_outermost_loop(%m : memref<10xindex>) {
+  affine.for %arg0 = 0 to 20 {
+    affine.for %arg1 = 0 to 30 {
+      %v0 = affine.for %arg2 = 0 to 10 iter_args (%prevAccum = %arg1) -> index {
+        %v1 = affine.load %m[%arg2] : memref<10xindex>
+        %newAccum = addi %prevAccum, %v1 : index
+        affine.yield %newAccum : index
+      }
+    }
+  }
+
+  // CHECK:      affine.for %{{.*}} = 0 to 30 {
+  // CHECK-NEXT:   %{{.*}}  = affine.for %{{.*}}  = 0 to 10 iter_args(%{{.*}} = %{{.*}}) -> (index) {
+  // CHECK-NEXT:     %{{.*}}  = affine.load %{{.*}}[%{{.*}}  : memref<10xindex>
+  // CHECK-NEXT:     %{{.*}}  = addi %{{.*}}, %{{.*}} : index
+  // CHECK-NEXT:     affine.yield %{{.*}} : index
+  // CHECK-NEXT:   }
+  // CHECK-NEXT: }
+  // CHECK-NEXT: affine.for %{{.*}} = 0 to 20 {
+  // CHECK-NEXT: }
+
+  return
+}
+
+// -----
+
 func @single_loop_nothing_invariant() {
   %m1 = memref.alloc() : memref<10xf32>
   %m2 = memref.alloc() : memref<10xf32>
@@ -228,8 +257,9 @@ func @load_after_load() {
   // CHECK-NEXT: %2 = addf %cst, %cst : f32
   // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
   // CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
-  // CHECK-NEXT: affine.for %arg1 = 0 to 10 {
-  // CHECK-NEXT: %4 = affine.load %0[%arg1] : memref<10xf32>
+  // CHECK-NEXT: }
+  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
+  // CHECK-NEXT: %{{.*}} = affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
 
   return
 }
@@ -252,6 +282,8 @@ func @invariant_affine_if() {
   // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
   // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT: }
+  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
   // CHECK-NEXT: affine.if #set(%arg0, %arg0) {
   // CHECK-NEXT: %1 = addf %cst, %cst : f32
   // CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
@@ -386,6 +418,8 @@ func @invariant_affine_nested_if_else2() {
   // CHECK-NEXT: %1 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
   // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT: }
+  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
   // CHECK-NEXT: affine.if #set(%arg0, %arg0) {
   // CHECK-NEXT: %2 = addf %cst, %cst : f32
   // CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
@@ -420,6 +454,8 @@ func @invariant_affine_nested_if2() {
   // CHECK: %0 = memref.alloc() : memref<10xf32>
   // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
   // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT: }
+  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
   // CHECK-NEXT: affine.if #set(%arg0, %arg0) {
   // CHECK-NEXT: %1 = addf %cst, %cst : f32
   // CHECK-NEXT: %2 = affine.load %0[%arg0] : memref<10xf32>
@@ -530,6 +566,8 @@ func @nested_load_store_same_memref2() {
   // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
   // CHECK-NEXT: %c0 = constant 0 : index
   // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT: }
+  // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
   // CHECK-NEXT:   affine.store %cst, %0[%c0] : memref<10xf32>
   // CHECK-NEXT:   %1 = affine.load %0[%arg0] : memref<10xf32>