[Mlir-commits] [mlir] 05c6c64 - [MLIR] [affine-loop-fusion] Fix a bug about non-result ops in affine-loop-fusion

Sat Feb 6 00:06:26 PST 2021

Author: Tung D. Le
Date: 2021-02-06T13:30:16+05:30
New Revision: 05c6c648ece36e222c0601206690d3a1358483f8

URL: https://github.com/llvm/llvm-project/commit/05c6c648ece36e222c0601206690d3a1358483f8
DIFF: https://github.com/llvm/llvm-project/commit/05c6c648ece36e222c0601206690d3a1358483f8.diff

LOG: [MLIR] [affine-loop-fusion] Fix a bug about non-result ops in affine-loop-fusion

This patch fixes the following bug when calling --affine-loop-fusion

Input program:
 ```mlir
func @should_not_fuse_since_top_level_non_affine_non_result_users(
    %in0 : memref<32xf32>, %in1 : memref<32xf32>) {
  %c0 = constant 0 : index
  %cst_0 = constant 0.000000e+00 : f32

  affine.for %d = 0 to 32 {
    %lhs = affine.load %in0[%d] : memref<32xf32>
    %rhs = affine.load %in1[%d] : memref<32xf32>
    %add = addf %lhs, %rhs : f32
    affine.store %add, %in0[%d] : memref<32xf32>
  }
  store %cst_0, %in0[%c0] : memref<32xf32>
  affine.for %d = 0 to 32 {
    %lhs = affine.load %in0[%d] : memref<32xf32>
    %rhs = affine.load %in1[%d] : memref<32xf32>
    %add = addf %lhs, %rhs: f32
    affine.store %add, %in0[%d] : memref<32xf32>
  }
  return
}
```

call --affine-loop-fusion, we got an incorrect output:

```mlir
func @should_not_fuse_since_top_level_non_affine_non_result_users(%arg0: memref<32xf32>, %arg1: memref<32xf32>) {
  %c0 = constant 0 : index
  %cst = constant 0.000000e+00 : f32
  store %cst, %arg0[%c0] : memref<32xf32>
  affine.for %arg2 = 0 to 32 {
    %0 = affine.load %arg0[%arg2] : memref<32xf32>
    %1 = affine.load %arg1[%arg2] : memref<32xf32>
    %2 = addf %0, %1 : f32
    affine.store %2, %arg0[%arg2] : memref<32xf32>
    %3 = affine.load %arg0[%arg2] : memref<32xf32>
    %4 = affine.load %arg1[%arg2] : memref<32xf32>
    %5 = addf %3, %4 : f32
    affine.store %5, %arg0[%arg2] : memref<32xf32>
  }
  return
}
```

This happened because when analyzing the source and destination nodes,
affine loop fusion ignored non-result ops sandwitched between them. In
other words, the MemRefDependencyGraph in the affine loop fusion ignored
these non-result ops.

This patch solves the issue by adding these non-result ops to the
MemRefDependencyGraph.

Reviewed By: bondhugula

Differential Revision: https://reviews.llvm.org/D95668

Added: 
    

Modified: 
    mlir/lib/Transforms/LoopFusion.cpp
    mlir/test/Transforms/loop-fusion.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp
index 96889f15c502..c3ffb17b1be7 100644

--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Transforms/LoopFusion.cpp
@@ -768,6 +768,17 @@ bool MemRefDependenceGraph::init(FuncOp f) {
       // could be used by loop nest nodes.
       Node node(nextNodeId++, &op);
       nodes.insert({node.id, node});
+    } else if (auto effectInterface = dyn_cast<MemoryEffectOpInterface>(op)) {
+      // Create graph node for top-level op, which could have a memory write
+      // side effect.
+      SmallVector<MemoryEffects::EffectInstance, 1> effects;
+      effectInterface.getEffects(effects);
+      if (llvm::any_of(effects, [](const MemoryEffects::EffectInstance &it) {
+            return isa<MemoryEffects::Write>(it.getEffect());
+          })) {
+        Node node(nextNodeId++, &op);
+        nodes.insert({node.id, node});
+      }
     }
   }
 

diff  --git a/mlir/test/Transforms/loop-fusion.mlir b/mlir/test/Transforms/loop-fusion.mlir
index c1bccea4c9f5..0c20ea46ad5e 100644
--- a/mlir/test/Transforms/loop-fusion.mlir
+++ b/mlir/test/Transforms/loop-fusion.mlir
@@ -2674,6 +2674,35 @@ func @should_not_fuse_since_top_level_non_affine_users(%in0 : memref<32xf32>,
 
 // -----
 
+// CHECK-LABEL: func @should_not_fuse_since_top_level_non_affine_mem_write_users
+func @should_not_fuse_since_top_level_non_affine_mem_write_users(
+    %in0 : memref<32xf32>, %in1 : memref<32xf32>) {
+  %c0 = constant 0 : index
+  %cst_0 = constant 0.000000e+00 : f32
+
+  affine.for %d = 0 to 32 {
+    %lhs = affine.load %in0[%d] : memref<32xf32>
+    %rhs = affine.load %in1[%d] : memref<32xf32>
+    %add = addf %lhs, %rhs : f32
+    affine.store %add, %in0[%d] : memref<32xf32>
+  }
+  store %cst_0, %in0[%c0] : memref<32xf32>
+  affine.for %d = 0 to 32 {
+    %lhs = affine.load %in0[%d] : memref<32xf32>
+    %rhs = affine.load %in1[%d] : memref<32xf32>
+    %add = addf %lhs, %rhs: f32
+    affine.store %add, %in0[%d] : memref<32xf32>
+  }
+  return
+}
+
+// CHECK:  affine.for
+// CHECK:    addf
+// CHECK:  affine.for
+// CHECK:    addf
+
+// -----
+
 // MAXIMAL-LABEL: func @fuse_minor_affine_map
 func @fuse_minor_affine_map(%in: memref<128xf32>, %out: memref<20x512xf32>) {
   %tmp = alloc() : memref<128xf32>