[Mlir-commits] [mlir] 6507417 - [mlir][sparse] make fusion for SDDMM more robust

Tue Dec 6 14:32:28 PST 2022

Author: Aart Bik
Date: 2022-12-06T14:32:19-08:00
New Revision: 65074179f26d9f2c7a8e74f136ed17adda144f17

URL: https://github.com/llvm/llvm-project/commit/65074179f26d9f2c7a8e74f136ed17adda144f17
DIFF: https://github.com/llvm/llvm-project/commit/65074179f26d9f2c7a8e74f136ed17adda144f17.diff

LOG: [mlir][sparse] make fusion for SDDMM more robust

Reviewed By: Peiming

Differential Revision: https://reviews.llvm.org/D139456

Added: 
    mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir

Modified: 
    mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
index b7461ec3740c6..f8c3919be60e4 100644

--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
@@ -51,13 +51,15 @@ static bool isSparseTensor(OpOperand *op) {
 // Helper method to find zero/uninitialized allocation.
 static bool isAlloc(OpOperand *op, bool isZero) {
   Value val = op->get();
+  // Check allocation, with zero alloc when required.
   if (auto alloc = val.getDefiningOp<AllocTensorOp>()) {
     Value copy = alloc.getCopy();
     if (isZero)
       return copy && isZeroValue(copy);
     return !copy;
   }
-  return false;
+  // Last resort for zero alloc: the whole value is zero.
+  return isZero && isZeroValue(val);
 }
 
 // Helper to detect sampling operation.

diff  --git a/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir b/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir
new file mode 100644
index 0000000000000..9e56f1cf2d7dd
--- /dev/null
+++ b/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir
@@ -0,0 +1,102 @@
+// RUN: mlir-opt %s  --pre-sparsification-rewrite --sparsification --cse | FileCheck %s
+
+#SM = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>
+
+#trait_matmul = {
+  indexing_maps = [
+    affine_map<(d0, d1, d2) -> (d1, d0)>,
+    affine_map<(d0, d1, d2) -> (d0, d2)>,
+    affine_map<(d0, d1, d2) -> (d1, d2)>
+  ],
+  iterator_types = ["reduction", "parallel", "parallel"]
+}
+
+#trait_scale = {
+  indexing_maps = [
+    affine_map<(d0, d1) -> (d0, d1)>,
+    affine_map<(d0, d1) -> (d0, d1)>,
+    affine_map<(d0, d1) -> (d0, d1)>
+  ],
+  iterator_types = ["parallel", "parallel"]
+}
+
+// CHECK-LABEL:   func.func @sparse_sampled_dd_unfused(
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<8x8xf64>,
+// CHECK-SAME:      %[[VAL_2:.*]]: tensor<8x8xf64>) -> tensor<8x8xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> {
+// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 8 : index
+// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant false
+// CHECK-DAG:       %[[VAL_7:.*]] = arith.constant true
+// CHECK:           %[[VAL_8:.*]] = bufferization.alloc_tensor() : tensor<8x8xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>>
+// CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<8x8xf64>
+// CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<8x8xf64>
+// CHECK:           %[[VAL_11:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xindex>
+// CHECK:           %[[VAL_12:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xindex>
+// CHECK:           %[[VAL_14:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xindex>
+// CHECK:           %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xf64>
+// CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK:           %[[VAL_18:.*]] = scf.for %[[VAL_19:.*]] = %[[VAL_16]] to %[[VAL_17]] step %[[VAL_5]] iter_args(%[[VAL_20:.*]] = %[[VAL_8]]) -> (tensor<8x8xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>>) {
+// CHECK:             %[[VAL_21:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_19]]] : memref<?xindex>
+// CHECK:             %[[VAL_22:.*]], %[[VAL_23:.*]], %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.expand %[[VAL_8]] : tensor<8x8xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xf64>, memref<?xi1>, memref<?xindex>
+// CHECK:             %[[VAL_26:.*]] = scf.for %[[VAL_27:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] iter_args(%[[VAL_28:.*]] = %[[VAL_25]]) -> (index) {
+// CHECK:               %[[VAL_29:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_21]], %[[VAL_27]]] : memref<8x8xf64>
+// CHECK:               %[[VAL_30:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_19]]] : memref<?xindex>
+// CHECK:               %[[VAL_31:.*]] = arith.addi %[[VAL_19]], %[[VAL_5]] : index
+// CHECK:               %[[VAL_32:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_31]]] : memref<?xindex>
+// CHECK:               %[[VAL_33:.*]] = scf.for %[[VAL_34:.*]] = %[[VAL_30]] to %[[VAL_32]] step %[[VAL_5]] iter_args(%[[VAL_35:.*]] = %[[VAL_28]]) -> (index) {
+// CHECK:                 %[[VAL_36:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref<?xindex>
+// CHECK:                 %[[VAL_37:.*]] = memref.load %[[VAL_22]]{{\[}}%[[VAL_36]]] : memref<?xf64>
+// CHECK:                 %[[VAL_38:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_27]], %[[VAL_36]]] : memref<8x8xf64>
+// CHECK:                 %[[VAL_39:.*]] = arith.mulf %[[VAL_29]], %[[VAL_38]] : f64
+// CHECK:                 %[[VAL_40:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_34]]] : memref<?xf64>
+// CHECK:                 %[[VAL_41:.*]] = arith.mulf %[[VAL_39]], %[[VAL_40]] : f64
+// CHECK:                 %[[VAL_42:.*]] = arith.addf %[[VAL_37]], %[[VAL_41]] : f64
+// CHECK:                 %[[VAL_43:.*]] = memref.load %[[VAL_23]]{{\[}}%[[VAL_36]]] : memref<?xi1>
+// CHECK:                 %[[VAL_44:.*]] = arith.cmpi eq, %[[VAL_43]], %[[VAL_6]] : i1
+// CHECK:                 %[[VAL_45:.*]] = scf.if %[[VAL_44]] -> (index) {
+// CHECK:                   memref.store %[[VAL_7]], %[[VAL_23]]{{\[}}%[[VAL_36]]] : memref<?xi1>
+// CHECK:                   memref.store %[[VAL_36]], %[[VAL_24]]{{\[}}%[[VAL_35]]] : memref<?xindex>
+// CHECK:                   %[[VAL_46:.*]] = arith.addi %[[VAL_35]], %[[VAL_5]] : index
+// CHECK:                   scf.yield %[[VAL_46]] : index
+// CHECK:                 } else {
+// CHECK:                   scf.yield %[[VAL_35]] : index
+// CHECK:                 }
+// CHECK:                 memref.store %[[VAL_42]], %[[VAL_22]]{{\[}}%[[VAL_36]]] : memref<?xf64>
+// CHECK:                 scf.yield %[[VAL_47:.*]] : index
+// CHECK:               } {"Emitted from" = "linalg.generic"}
+// CHECK:               scf.yield %[[VAL_48:.*]] : index
+// CHECK:             } {"Emitted from" = "linalg.generic"}
+// CHECK:             %[[VAL_49:.*]] = sparse_tensor.compress %[[VAL_22]], %[[VAL_23]], %[[VAL_24]], %[[VAL_50:.*]] into %[[VAL_20]]{{\[}}%[[VAL_21]]] : memref<?xf64>, memref<?xi1>, memref<?xindex>, tensor<8x8xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>>
+// CHECK:             scf.yield %[[VAL_49]] : tensor<8x8xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>>
+// CHECK:           } {"Emitted from" = "linalg.generic"}
+// CHECK:           %[[VAL_51:.*]] = sparse_tensor.load %[[VAL_52:.*]] hasInserts : tensor<8x8xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>>
+// CHECK:           return %[[VAL_51]] : tensor<8x8xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>>
+// CHECK:         }
+func.func @sparse_sampled_dd_unfused(%args: tensor<8x8xf64, #SM>,
+                                     %arga: tensor<8x8xf64>,
+                                     %argb: tensor<8x8xf64>) -> tensor<8x8xf64, #SM> {
+  // Perform dense-dense matrix matrix multiplication.
+  %1 = arith.constant dense<0.0> : tensor<8x8xf64>
+  %2 = linalg.generic #trait_matmul
+    ins(%arga, %argb : tensor<8x8xf64>, tensor<8x8xf64>)
+    outs(%1 : tensor<8x8xf64>) {
+      ^bb0(%a: f64, %b: f64, %x: f64):
+        %p = arith.mulf %a, %b : f64
+        %q = arith.addf %x, %p : f64
+        linalg.yield %q : f64
+  } -> tensor<8x8xf64>
+  // Sample the result with elements-wise multiplication with sparse matrix.
+  %3 = bufferization.alloc_tensor() : tensor<8x8xf64, #SM>
+  %4 = linalg.generic #trait_scale
+    ins(%2, %args : tensor<8x8xf64>, tensor<8x8xf64, #SM>)
+    outs(%3 : tensor<8x8xf64, #SM>) {
+      ^bb0(%t: f64, %s: f64, %x: f64):
+        %r = arith.mulf %t, %s : f64
+        linalg.yield %r : f64
+  } -> tensor<8x8xf64, #SM>
+  return %4 : tensor<8x8xf64, #SM>
+}