[PATCH] D79547: [MLIR] Fix affine fusion bug/efficiency issue / enable more fusion

Wed May 6 22:42:29 PDT 2020

This revision was automatically updated to reflect the committed changes.
bondhugula marked an inline comment as done.
Closed by commit rG2affcd664e6a: [MLIR] Fix affine fusion bug/efficiency issue / enable more fusion (authored by bondhugula).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79547/new/

https://reviews.llvm.org/D79547

Files:
  mlir/lib/Transforms/LoopFusion.cpp
  mlir/test/Transforms/loop-fusion.mlir


Index: mlir/test/Transforms/loop-fusion.mlir
===================================================================

--- mlir/test/Transforms/loop-fusion.mlir
+++ mlir/test/Transforms/loop-fusion.mlir
@@ -2422,5 +2422,45 @@
   // CHECK-NEXT: affine.store %{{.*}}, %[[A]]
   // CHECK-NEXT: affine.load %[[B]]
   // CHECK-NOT: affine.for %{{.*}}
+  // CHECK: return
   return
 }
+
+// -----
+
+// MAXIMAL-LABEL: func @reshape_into_matmul
+func @reshape_into_matmul(%lhs : memref<1024x1024xf32>,
+              %R: memref<16x64x1024xf32>, %out: memref<1024x1024xf32>) {
+  %rhs = alloc() :  memref<1024x1024xf32>
+
+  // Reshape from 3-d to 2-d.
+  affine.for %i0 = 0 to 16 {
+    affine.for %i1 = 0 to 64 {
+      affine.for %k = 0 to 1024 {
+        %v = affine.load %R[%i0, %i1, %k] : memref<16x64x1024xf32>
+        affine.store %v, %rhs[64*%i0 + %i1, %k] : memref<1024x1024xf32>
+      }
+    }
+  }
+
+  // Matmul.
+  affine.for %i = 0 to 1024 {
+    affine.for %j = 0 to 1024 {
+      affine.for %k = 0 to 1024 {
+        %0 = affine.load %rhs[%k, %j] : memref<1024x1024xf32>
+        %1 = affine.load %lhs[%i, %k] : memref<1024x1024xf32>
+        %2 = mulf %1, %0 : f32
+        %3 = affine.load %out[%i, %j] : memref<1024x1024xf32>
+        %4 = addf %3, %2 : f32
+        affine.store %4, %out[%i, %j] : memref<1024x1024xf32>
+      }
+    }
+  }
+  return
+}
+// MAXIMAL-NEXT: alloc
+// MAXIMAL-NEXT: affine.for
+// MAXIMAL-NEXT:   affine.for
+// MAXIMAL-NEXT:     affine.for
+// MAXIMAL-NOT:      affine.for
+// MAXIMAL:      return
Index: mlir/lib/Transforms/LoopFusion.cpp
===================================================================
--- mlir/lib/Transforms/LoopFusion.cpp
+++ mlir/lib/Transforms/LoopFusion.cpp
@@ -1625,7 +1625,10 @@
             // continue fusing based on new operands.
             for (auto *loadOpInst : dstLoopCollector.loadOpInsts) {
               auto loadMemRef = cast<AffineLoadOp>(loadOpInst).getMemRef();
-              if (visitedMemrefs.count(loadMemRef) == 0)
+              // NOTE: Change 'loads' to a hash set in case efficiency is an
+              // issue. We still use a vector since it's expected to be small.
+              if (visitedMemrefs.count(loadMemRef) == 0 &&
+                  !llvm::is_contained(loads, loadOpInst))
                 loads.push_back(loadOpInst);
             }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D79547.262553.patch
Type: text/x-patch
Size: 2348 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200507/c1e9bef8/attachment.bin>