[llvm-branch-commits] [mlir] 64e8f9d - [MLIR] Add affine-cs-pipeline.

Fri Nov 5 03:30:29 PDT 2021

Author: Prateek Gupta
Date: 2021-09-24T20:50:55+05:30
New Revision: 64e8f9dd5cacb266ae38f788feb1c1acfb3475d3

URL: https://github.com/llvm/llvm-project/commit/64e8f9dd5cacb266ae38f788feb1c1acfb3475d3
DIFF: https://github.com/llvm/llvm-project/commit/64e8f9dd5cacb266ae38f788feb1c1acfb3475d3.diff

LOG: [MLIR] Add affine-cs-pipeline.

This commit introduces affine-cs-pipeline. The pipeline performs maximal
affine loop fusion and memref data flow optimization with
canonicalization between every pass. Relevant test case is also added.

Signed-off-by: Prateek Gupta <prateek at polymagelabs.com>

Changes while porting to upstream:
1. MemRefDataFlowOptPass is now called AffineScalarReplacementPass.
2. Registering the AffineCSPipeline at one place for `mlir-opt`.
3. Unused store elimination is added upstream, and therefore, test case
   that involves an unused store is eliminated.

Added: 
    mlir/lib/Transforms/AffineCSPipeline.cpp
    mlir/test/Transforms/affine-cs-pipeline.mlir

Modified: 
    mlir/include/mlir/InitAllPasses.h
    mlir/lib/Transforms/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/InitAllPasses.h b/mlir/include/mlir/InitAllPasses.h
index 59b35b7096f5a..47ef5f0e7d771 100644

--- a/mlir/include/mlir/InitAllPasses.h
+++ b/mlir/include/mlir/InitAllPasses.h
@@ -34,6 +34,8 @@
 #include <cstdlib>
 
 namespace mlir {
+// Cerebras specific pipeline.
+void registerAffineCSPipeline();
 
 // This function may be called to register the MLIR passes with the
 // global registry.
@@ -66,6 +68,9 @@ inline void registerAllPasses() {
   registerStandardPasses();
   tensor::registerTensorPasses();
   tosa::registerTosaOptPasses();
+
+  // Cerebras specific pipeline.
+  registerAffineCSPipeline();
 }
 
 } // namespace mlir

diff  --git a/mlir/lib/Transforms/AffineCSPipeline.cpp b/mlir/lib/Transforms/AffineCSPipeline.cpp
new file mode 100644
index 0000000000000..c6994188b0fd0
--- /dev/null
+++ b/mlir/lib/Transforms/AffineCSPipeline.cpp
@@ -0,0 +1,36 @@
+//===---------------------AffineCSPipeline.cpp-----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pipeline for performing affine loop fusion and other
+// complimentary optimizations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Affine/Passes.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Transforms/Passes.h"
+
+namespace mlir {
+void createAffineCSPipeline(OpPassManager &pm) {
+  pm.addPass(mlir::createCanonicalizerPass());
+  pm.addPass(mlir::createLoopFusionPass(/*fastMemorySpace=*/0,
+                                        /*localBufSizeThreshold=*/0,
+                                        /*maximalFusion=*/true));
+  pm.addPass(mlir::createCanonicalizerPass());
+  pm.addPass(mlir::createAffineScalarReplacementPass());
+  pm.addPass(mlir::createCanonicalizerPass());
+}
+void registerAffineCSPipeline() {
+  mlir::PassPipelineRegistration<>(
+      "affine-cs-pipeline",
+      "runs all passes for performing affine loop fusion and other "
+      "complimentary optimizations.",
+      createAffineCSPipeline);
+}
+} // end namespace mlir

diff  --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt
index 54f3693c89c69..72065445ece92 100644
--- a/mlir/lib/Transforms/CMakeLists.txt
+++ b/mlir/lib/Transforms/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_subdirectory(Utils)
 
 add_mlir_library(MLIRTransforms
+  AffineCSPipeline.cpp
   BufferDeallocation.cpp
   BufferOptimizations.cpp
   BufferResultsToOutParams.cpp

diff  --git a/mlir/test/Transforms/affine-cs-pipeline.mlir b/mlir/test/Transforms/affine-cs-pipeline.mlir
new file mode 100644
index 0000000000000..70ed6a30473a3
--- /dev/null
+++ b/mlir/test/Transforms/affine-cs-pipeline.mlir
@@ -0,0 +1,71 @@
+// RUN: mlir-opt %s -affine-cs-pipeline | FileCheck %s
+
+// This test case checks for the maximal loop fusion.
+func @simple_matmul_one(%arg0: memref<16x16xf32>, %arg1: memref<16x16xf32>, %arg2: memref<16x16xf32>) {
+    %cst = constant 0.000000e+00 : f32
+    affine.for %arg3 = 0 to 16 {
+        affine.for %arg4 = 0 to 16 {
+            affine.store %cst, %arg2[%arg3, %arg4] : memref<16x16xf32>
+        }
+    }
+    affine.for %arg3 = 0 to 16 {
+        affine.for %arg4 = 0 to 16 {
+            affine.for %arg5 = 0 to 16 {
+                %0 = affine.load %arg0[%arg3, %arg5] : memref<16x16xf32>
+                %1 = affine.load %arg1[%arg5, %arg4] : memref<16x16xf32>
+                %2 = affine.load %arg2[%arg3, %arg4] : memref<16x16xf32>
+                %3 = mulf %0, %1 : f32
+                %4 = addf %3, %2 : f32
+                affine.store %4, %arg2[%arg3, %arg4] : memref<16x16xf32>
+            }
+        }
+    }
+    return
+}
+
+// CHECK-LABEL: func @simple_matmul_one
+// CHECK:       (%[[LHS:.*]]: memref<16x16xf32>, %[[RHS:.*]]: memref<16x16xf32>, %[[OUT:.*]]: memref<16x16xf32>) {
+// CHECK:           %[[INIT:.*]] = constant 0.000000e+00 : f32
+// CHECK-NEXT:      affine.for %[[i:.*]] = 0 to 16 {
+// CHECK-NEXT:          affine.for %[[j:.*]] = 0 to 16 {
+// CHECK-NEXT:              affine.store %[[INIT]], %[[OUT]][%[[i]], %[[j]]]
+// CHECK-NEXT:              affine.for %[[k:.*]] = 0 to 16 {
+// CHECK-NEXT:                  %[[LHS_VAL:.*]] = affine.load %[[LHS]][%[[i]], %[[k]]]
+// CHECK-NEXT:                  %[[RHS_VAL:.*]] = affine.load %[[RHS]][%[[k]], %[[j]]]
+// CHECK-NEXT:                  %[[OUT_VAL:.*]] = affine.load %[[OUT]][%[[i]], %[[j]]]
+// CHECK-NEXT:                  %[[PROD:.*]] = mulf %[[LHS_VAL]], %[[RHS_VAL]]
+// CHECK-NEXT:                  %[[RES:.*]] = addf %[[PROD]], %[[OUT_VAL]]
+// CHECK-NEXT:                  affine.store %[[RES]], %[[OUT]][%[[i]], %[[j]]]
+
+
+// This test case checks the memref dataflow optimization.
+func @simple_matmul_two(%arg0: memref<16x16xf32>, %arg1: memref<16x16xf32>, %arg2: memref<16x16xf32>) {
+    %cst = constant 0.000000e+00 : f32
+    affine.for %arg3 = 0 to 16 {
+        affine.for %arg4 = 0 to 16 {
+            affine.for %arg5 = 0 to 16 {
+                %0 = affine.load %arg0[%arg3, %arg5] : memref<16x16xf32>
+                %1 = affine.load %arg1[%arg5, %arg4] : memref<16x16xf32>
+                affine.store %cst, %arg2[%arg3, %arg4] : memref<16x16xf32>
+                %2 = affine.load %arg2[%arg3, %arg4] : memref<16x16xf32>
+                %3 = mulf %0, %1 : f32
+                %4 = addf %3, %2 : f32
+                affine.store %4, %arg2[%arg3, %arg4] : memref<16x16xf32>
+            }
+        }
+    }
+    return
+}
+
+// CHECK-LABEL: func @simple_matmul_two
+// CHECK:       (%[[LHS:.*]]: memref<16x16xf32>, %[[RHS:.*]]: memref<16x16xf32>, %[[OUT:.*]]: memref<16x16xf32>) {
+// CHECK:           %[[INIT:.*]] = constant 0.000000e+00 : f32
+// CHECK-NEXT:      affine.for %[[i:.*]] = 0 to 16 {
+// CHECK-NEXT:          affine.for %[[j:.*]] = 0 to 16 {
+// CHECK-NEXT:              affine.for %[[k:.*]] = 0 to 16 {
+// CHECK-NEXT:                  %[[LHS_VAL:.*]] = affine.load %[[LHS]][%[[i]], %[[k]]]
+// CHECK-NEXT:                  %[[RHS_VAL:.*]] = affine.load %[[RHS]][%[[k]], %[[j]]]
+// CHECK-NEXT:                  %[[PROD:.*]] = mulf %[[LHS_VAL]], %[[RHS_VAL]]
+// CHECK-NEXT:                  %[[RES:.*]] = addf %[[PROD]], %[[INIT]]
+// CHECK-NEXT:                  affine.store %[[RES]], %[[OUT]][%[[i]], %[[j]]]
+