[Mlir-commits] [mlir] af42550 - [mlir][sparse] refine optimization, add few more test cases

Aart Bik llvmlistbot at llvm.org
Fri Nov 20 17:02:16 PST 2020


Author: Aart Bik
Date: 2020-11-20T17:01:59-08:00
New Revision: af42550523d9feef72e6838d5b4e553ef4af79f9

URL: https://github.com/llvm/llvm-project/commit/af42550523d9feef72e6838d5b4e553ef4af79f9
DIFF: https://github.com/llvm/llvm-project/commit/af42550523d9feef72e6838d5b4e553ef4af79f9.diff

LOG: [mlir][sparse] refine optimization, add few more test cases

Adds tests for full sum reduction (tensors summed up into scalars)
and the well-known sampled-dense-dense-matrix-product. Refines
the optimizations rules slightly to handle the summation better.

Reviewed By: penpornk

Differential Revision: https://reviews.llvm.org/D91818

Added: 
    

Modified: 
    mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp
    mlir/test/Dialect/Linalg/sparse_1d.mlir
    mlir/test/Dialect/Linalg/sparse_2d.mlir
    mlir/test/Dialect/Linalg/sparse_3d.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp b/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp
index f449ed3c3343..caef07743fa1 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp
@@ -140,7 +140,9 @@ class Merger {
     return s;
   }
 
-  /// Optimizes the iteration lattice points in the given set.
+  /// Optimizes the iteration lattice points in the given set. This
+  /// method should be called right before code generation to avoid
+  /// generating redundant loops and conditions.
   unsigned optimize(unsigned s0) {
     unsigned s = addSet();
     assert(latSets[s0].size() != 0);
@@ -148,15 +150,21 @@ class Merger {
     for (unsigned p1 : latSets[s0]) {
       bool add = true;
       if (p0 != p1) {
+        // Is this a straightforward copy?
+        unsigned e = latPoints[p1].exp;
+        if (exp(e).kind == Kind::kTensor && exp(e).e0 == numTensors - 1)
+          continue;
+        // Is any dense index exhausted?
         llvm::BitVector tmp = latPoints[p1].bits;
         tmp ^= latPoints[p0].bits;
         if (hasAnyOf(tmp, false))
-          continue; // dense exhausted?
+          continue;
+        // Is this a direct duplication of an earlier conjunction?
         for (unsigned p2 : latSets[s]) {
           tmp = latPoints[p1].bits;
           tmp ^= latPoints[p2].bits;
           if (tmp.count() == 0) {
-            add = false; // direct dup?
+            add = false;
             break;
           }
         }

diff  --git a/mlir/test/Dialect/Linalg/sparse_1d.mlir b/mlir/test/Dialect/Linalg/sparse_1d.mlir
index b0f8f8d5c676..e20cdbd62d64 100644
--- a/mlir/test/Dialect/Linalg/sparse_1d.mlir
+++ b/mlir/test/Dialect/Linalg/sparse_1d.mlir
@@ -635,3 +635,48 @@ func @mul_ss(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
   } -> tensor<32xf32>
   return %0 : tensor<32xf32>
 }
+
+#trait_sum_reduction = {
+  indexing_maps = [
+    affine_map<(i) -> (i)>,  // a
+    affine_map<(i) -> ()>    // x (scalar out)
+  ],
+  sparse = [
+    [ "S" ],  // a
+    [  ]      // x
+  ],
+  iterator_types = ["reduction"],
+  doc = "x = SUM_i a(i)"
+}
+
+// CHECK-LABEL:   func @sum_reduction(
+// CHECK-SAME:                        %[[VAL_0:.*]]: tensor<?xf32>,
+// CHECK-SAME:                        %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
+// CHECK:           %[[VAL_2:.*]] = constant 999 : index
+// CHECK:           %[[VAL_3:.*]] = constant 0 : index
+// CHECK:           %[[VAL_4:.*]] = constant 1 : index
+// CHECK:           %[[VAL_5:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK:           %[[VAL_6:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK:           %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
+// CHECK:           %[[VAL_8:.*]] = alloca() : memref<f32>
+// CHECK:           %[[VAL_9:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] {
+// CHECK:             %[[VAL_12:.*]] = load %[[VAL_8]][] : memref<f32>
+// CHECK:             %[[VAL_13:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_11]]] : memref<?xf32>
+// CHECK:             %[[VAL_14:.*]] = addf %[[VAL_12]], %[[VAL_13]] : f32
+// CHECK:             store %[[VAL_14]], %[[VAL_8]][] : memref<f32>
+// CHECK:           }
+// CHECK:           %[[VAL_15:.*]] = tensor_load %[[VAL_8]] : memref<f32>
+// CHECK:           return %[[VAL_15]] : tensor<f32>
+// CHECK:         }
+func @sum_reduction(%arga: tensor<?xf32>, %argx: tensor<f32>) -> tensor<f32> {
+  %0 = linalg.generic #trait_sum_reduction
+    ins(%arga : tensor<?xf32>)
+    init(%argx : tensor<f32>) {
+      ^bb(%a : f32, %x : f32):
+        %0 = addf %x, %a  : f32
+        linalg.yield %0: f32
+  } -> tensor<f32>
+  return %0 : tensor<f32>
+}

diff  --git a/mlir/test/Dialect/Linalg/sparse_2d.mlir b/mlir/test/Dialect/Linalg/sparse_2d.mlir
index 38d55ef69c4e..a4692be29c89 100644
--- a/mlir/test/Dialect/Linalg/sparse_2d.mlir
+++ b/mlir/test/Dialect/Linalg/sparse_2d.mlir
@@ -1056,3 +1056,131 @@ func @matvec(%argA: tensor<16x32xf32>, %argb: tensor<32xf32>, %argx: tensor<16xf
   } -> tensor<16xf32>
   return %0 : tensor<16xf32>
 }
+
+#trait_sum_reduction = {
+  indexing_maps = [
+    affine_map<(i,j) -> (i,j)>,  // a
+    affine_map<(i,j) -> ()>      // x (scalar out)
+  ],
+  sparse = [
+    [ "D","S" ],  // a
+    [ ]           // x
+  ],
+  iterator_types = ["reduction", "reduction"],
+  doc = "x = SUM_ij a(i,j)"
+}
+
+// CHECK-LABEL:   func @sum_reduction(
+// CHECK-SAME:                %[[VAL_0:.*]]: tensor<10x20xf32>,
+// CHECK-SAME:                %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
+// CHECK:           %[[VAL_2:.*]] = constant 999 : index
+// CHECK:           %[[VAL_3:.*]] = constant 10 : index
+// CHECK:           %[[VAL_4:.*]] = constant 0 : index
+// CHECK:           %[[VAL_5:.*]] = constant 1 : index
+// CHECK:           %[[VAL_6:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK:           %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK:           %[[VAL_8:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
+// CHECK:           %[[VAL_9:.*]] = alloca() : memref<f32>
+// CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
+// CHECK:             %[[VAL_11:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<?xindex>
+// CHECK:             %[[VAL_12:.*]] = addi %[[VAL_10]], %[[VAL_5]] : index
+// CHECK:             %[[VAL_13:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref<?xindex>
+// CHECK:             scf.for %[[VAL_14:.*]] = %[[VAL_11]] to %[[VAL_13]] step %[[VAL_5]] {
+// CHECK:               %[[VAL_15:.*]] = load %[[VAL_9]][] : memref<f32>
+// CHECK:               %[[VAL_16:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_14]]] : memref<?xf32>
+// CHECK:               %[[VAL_17:.*]] = addf %[[VAL_15]], %[[VAL_16]] : f32
+// CHECK:               store %[[VAL_17]], %[[VAL_9]][] : memref<f32>
+// CHECK:             }
+// CHECK:           }
+// CHECK:           %[[VAL_18:.*]] = tensor_load %[[VAL_9]] : memref<f32>
+// CHECK:           return %[[VAL_18]] : tensor<f32>
+// CHECK:         }
+func @sum_reduction(%arga: tensor<10x20xf32>, %argx: tensor<f32>) -> tensor<f32> {
+  %0 = linalg.generic #trait_sum_reduction
+    ins(%arga : tensor<10x20xf32>)
+    init(%argx : tensor<f32>) {
+      ^bb(%a : f32, %x : f32):
+        %0 = addf %x, %a  : f32
+        linalg.yield %0: f32
+  } -> tensor<f32>
+  return %0 : tensor<f32>
+}
+
+#trait_sampled_dense_dense = {
+  indexing_maps = [
+    affine_map<(i,j,k) -> (i,j)>,  // S
+    affine_map<(i,j,k) -> (i,k)>,  // A
+    affine_map<(i,j,k) -> (k,j)>,  // B
+    affine_map<(i,j,k) -> (i,j)>   // X (out)
+  ],
+  sparse = [
+    [ "S", "S" ],  // S
+    [ "D", "D" ],  // A
+    [ "D", "D" ],  // B
+    [ "D", "D" ]   // X
+  ],
+  iterator_types = ["parallel", "parallel", "reduction"],
+  doc = "X(i,j) = S(i,j) SUM_k A(i,k) B(k,j)"
+}
+
+// CHECK-LABEL:   func @sampled_dense_dense(
+// CHECK-SAME:                              %[[VAL_0:.*0]]: tensor<?x?xf32>,
+// CHECK-SAME:                              %[[VAL_1:.*1]]: tensor<?x?xf32>,
+// CHECK-SAME:                              %[[VAL_2:.*2]]: tensor<?x?xf32>,
+// CHECK-SAME:                              %[[VAL_3:.*3]]: tensor<?x?xf32>) -> tensor<?x?xf32> {
+// CHECK:           %[[VAL_4:.*]] = constant 999 : index
+// CHECK:           %[[VAL_5:.*]] = constant 0 : index
+// CHECK:           %[[VAL_6:.*]] = constant 1 : index
+// CHECK:           %[[VAL_7:.*]] = alloca(%[[VAL_4]]) : memref<?xindex>
+// CHECK:           %[[VAL_8:.*]] = alloca(%[[VAL_4]]) : memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = alloca(%[[VAL_4]]) : memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = alloca(%[[VAL_4]]) : memref<?xindex>
+// CHECK:           %[[VAL_11:.*]] = alloca(%[[VAL_4]]) : memref<?xf32>
+// CHECK:           %[[VAL_12:.*]] = dim %[[VAL_1]], %[[VAL_5]] : tensor<?x?xf32>
+// CHECK:           %[[VAL_13:.*]] = dim %[[VAL_1]], %[[VAL_6]] : tensor<?x?xf32>
+// CHECK:           %[[VAL_14:.*]] = alloca(%[[VAL_12]], %[[VAL_13]]) : memref<?x?xf32>
+// CHECK:           %[[VAL_15:.*]] = dim %[[VAL_2]], %[[VAL_5]] : tensor<?x?xf32>
+// CHECK:           %[[VAL_16:.*]] = dim %[[VAL_2]], %[[VAL_6]] : tensor<?x?xf32>
+// CHECK:           %[[VAL_17:.*]] = alloca(%[[VAL_15]], %[[VAL_16]]) : memref<?x?xf32>
+// CHECK:           %[[VAL_18:.*]] = dim %[[VAL_3]], %[[VAL_5]] : tensor<?x?xf32>
+// CHECK:           %[[VAL_19:.*]] = dim %[[VAL_3]], %[[VAL_6]] : tensor<?x?xf32>
+// CHECK:           %[[VAL_20:.*]] = alloca(%[[VAL_18]], %[[VAL_19]]) : memref<?x?xf32>
+// CHECK:           %[[VAL_21:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK:           %[[VAL_22:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
+// CHECK:           scf.for %[[VAL_23:.*]] = %[[VAL_21]] to %[[VAL_22]] step %[[VAL_6]] {
+// CHECK:             %[[VAL_24:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_23]]] : memref<?xindex>
+// CHECK:             scf.for %[[VAL_25:.*]] = %[[VAL_5]] to %[[VAL_15]] step %[[VAL_6]] {
+// CHECK:               %[[VAL_26:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_23]]] : memref<?xindex>
+// CHECK:               %[[VAL_27:.*]] = addi %[[VAL_23]], %[[VAL_6]] : index
+// CHECK:               %[[VAL_28:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_27]]] : memref<?xindex>
+// CHECK:               scf.for %[[VAL_29:.*]] = %[[VAL_26]] to %[[VAL_28]] step %[[VAL_6]] {
+// CHECK:                 %[[VAL_30:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_29]]] : memref<?xindex>
+// CHECK:                 %[[VAL_31:.*]] = load %[[VAL_20]]{{\[}}%[[VAL_24]], %[[VAL_30]]] : memref<?x?xf32>
+// CHECK:                 %[[VAL_32:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_29]]] : memref<?xf32>
+// CHECK:                 %[[VAL_33:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_24]], %[[VAL_25]]] : memref<?x?xf32>
+// CHECK:                 %[[VAL_34:.*]] = load %[[VAL_17]]{{\[}}%[[VAL_25]], %[[VAL_30]]] : memref<?x?xf32>
+// CHECK:                 %[[VAL_35:.*]] = mulf %[[VAL_33]], %[[VAL_34]] : f32
+// CHECK:                 %[[VAL_36:.*]] = mulf %[[VAL_32]], %[[VAL_35]] : f32
+// CHECK:                 %[[VAL_37:.*]] = addf %[[VAL_31]], %[[VAL_36]] : f32
+// CHECK:                 store %[[VAL_37]], %[[VAL_20]]{{\[}}%[[VAL_24]], %[[VAL_30]]] : memref<?x?xf32>
+// CHECK:               }
+// CHECK:             }
+// CHECK:           }
+// CHECK:           %[[VAL_38:.*]] = tensor_load %[[VAL_20]] : memref<?x?xf32>
+// CHECK:           return %[[VAL_38]] : tensor<?x?xf32>
+// CHECK:         }
+func @sampled_dense_dense(%args: tensor<?x?xf32>,
+                          %arga: tensor<?x?xf32>,
+                          %argb: tensor<?x?xf32>,
+			  %argx: tensor<?x?xf32>) -> tensor<?x?xf32> {
+  %0 = linalg.generic #trait_sampled_dense_dense
+    ins(%args, %arga, %argb : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)
+    init(%argx : tensor<?x?xf32>) {
+      ^bb(%s : f32, %a : f32, %b : f32, %x : f32):
+        %0 = mulf %a, %b  : f32
+        %1 = mulf %s, %0  : f32
+        %2 = addf %x, %1  : f32
+        linalg.yield %2: f32
+  } -> tensor<?x?xf32>
+  return %0 : tensor<?x?xf32>
+}

diff  --git a/mlir/test/Dialect/Linalg/sparse_3d.mlir b/mlir/test/Dialect/Linalg/sparse_3d.mlir
index a253ea4557c3..a6794f2e6487 100644
--- a/mlir/test/Dialect/Linalg/sparse_3d.mlir
+++ b/mlir/test/Dialect/Linalg/sparse_3d.mlir
@@ -1223,3 +1223,61 @@ func @kernel_3d(%arga: tensor<?x?xf32>,
   } -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
+#trait_sum_reduction = {
+  indexing_maps = [
+    affine_map<(i,j,k) -> (i,j,k)>,  // a
+    affine_map<(i,j,k) -> ()>        // x (scalar out)
+  ],
+  sparse = [
+    [ "S", "S", "S" ],  // a
+    [ ]                 // x
+  ],
+  iterator_types = ["reduction", "reduction", "reduction"],
+  doc = "x = SUM_ijk a(i,j,k)"
+}
+
+// CHECK-LABEL:   func @sum_reduction(
+// CHECK-SAME:                        %[[VAL_0:.*]]: tensor<10x20x30xf32>,
+// CHECK-SAME:                        %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
+// CHECK:           %[[VAL_2:.*]] = constant 999 : index
+// CHECK:           %[[VAL_3:.*]] = constant 0 : index
+// CHECK:           %[[VAL_4:.*]] = constant 1 : index
+// CHECK:           %[[VAL_5:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK:           %[[VAL_6:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK:           %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK:           %[[VAL_8:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK:           %[[VAL_11:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
+// CHECK:           %[[VAL_12:.*]] = alloca() : memref<f32>
+// CHECK:           %[[VAL_13:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK:           %[[VAL_14:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           scf.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step %[[VAL_4]] {
+// CHECK:             %[[VAL_16:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
+// CHECK:             %[[VAL_17:.*]] = addi %[[VAL_15]], %[[VAL_4]] : index
+// CHECK:             %[[VAL_18:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_17]]] : memref<?xindex>
+// CHECK:             scf.for %[[VAL_19:.*]] = %[[VAL_16]] to %[[VAL_18]] step %[[VAL_4]] {
+// CHECK:               %[[VAL_20:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_19]]] : memref<?xindex>
+// CHECK:               %[[VAL_21:.*]] = addi %[[VAL_19]], %[[VAL_4]] : index
+// CHECK:               %[[VAL_22:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_21]]] : memref<?xindex>
+// CHECK:               scf.for %[[VAL_23:.*]] = %[[VAL_20]] to %[[VAL_22]] step %[[VAL_4]] {
+// CHECK:                 %[[VAL_24:.*]] = load %[[VAL_12]][] : memref<f32>
+// CHECK:                 %[[VAL_25:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_23]]] : memref<?xf32>
+// CHECK:                 %[[VAL_26:.*]] = addf %[[VAL_24]], %[[VAL_25]] : f32
+// CHECK:                 store %[[VAL_26]], %[[VAL_12]][] : memref<f32>
+// CHECK:               }
+// CHECK:             }
+// CHECK:           }
+// CHECK:           %[[VAL_27:.*]] = tensor_load %[[VAL_12]] : memref<f32>
+// CHECK:           return %[[VAL_27]] : tensor<f32>
+// CHECK:         }
+func @sum_reduction(%arga: tensor<10x20x30xf32>, %argx: tensor<f32>) -> tensor<f32> {
+  %0 = linalg.generic #trait_sum_reduction
+    ins(%arga : tensor<10x20x30xf32>)
+    init(%argx : tensor<f32>) {
+      ^bb(%a : f32, %x : f32):
+        %0 = addf %x, %a  : f32
+        linalg.yield %0: f32
+  } -> tensor<f32>
+  return %0 : tensor<f32>
+}


        


More information about the Mlir-commits mailing list