[Mlir-commits] [mlir] af42550 - [mlir][sparse] refine optimization, add few more test cases
Aart Bik
llvmlistbot at llvm.org
Fri Nov 20 17:02:16 PST 2020
Author: Aart Bik
Date: 2020-11-20T17:01:59-08:00
New Revision: af42550523d9feef72e6838d5b4e553ef4af79f9
URL: https://github.com/llvm/llvm-project/commit/af42550523d9feef72e6838d5b4e553ef4af79f9
DIFF: https://github.com/llvm/llvm-project/commit/af42550523d9feef72e6838d5b4e553ef4af79f9.diff
LOG: [mlir][sparse] refine optimization, add few more test cases
Adds tests for full sum reduction (tensors summed up into scalars)
and the well-known sampled-dense-dense-matrix-product. Refines
the optimizations rules slightly to handle the summation better.
Reviewed By: penpornk
Differential Revision: https://reviews.llvm.org/D91818
Added:
Modified:
mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp
mlir/test/Dialect/Linalg/sparse_1d.mlir
mlir/test/Dialect/Linalg/sparse_2d.mlir
mlir/test/Dialect/Linalg/sparse_3d.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp b/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp
index f449ed3c3343..caef07743fa1 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Sparsification.cpp
@@ -140,7 +140,9 @@ class Merger {
return s;
}
- /// Optimizes the iteration lattice points in the given set.
+ /// Optimizes the iteration lattice points in the given set. This
+ /// method should be called right before code generation to avoid
+ /// generating redundant loops and conditions.
unsigned optimize(unsigned s0) {
unsigned s = addSet();
assert(latSets[s0].size() != 0);
@@ -148,15 +150,21 @@ class Merger {
for (unsigned p1 : latSets[s0]) {
bool add = true;
if (p0 != p1) {
+ // Is this a straightforward copy?
+ unsigned e = latPoints[p1].exp;
+ if (exp(e).kind == Kind::kTensor && exp(e).e0 == numTensors - 1)
+ continue;
+ // Is any dense index exhausted?
llvm::BitVector tmp = latPoints[p1].bits;
tmp ^= latPoints[p0].bits;
if (hasAnyOf(tmp, false))
- continue; // dense exhausted?
+ continue;
+ // Is this a direct duplication of an earlier conjunction?
for (unsigned p2 : latSets[s]) {
tmp = latPoints[p1].bits;
tmp ^= latPoints[p2].bits;
if (tmp.count() == 0) {
- add = false; // direct dup?
+ add = false;
break;
}
}
diff --git a/mlir/test/Dialect/Linalg/sparse_1d.mlir b/mlir/test/Dialect/Linalg/sparse_1d.mlir
index b0f8f8d5c676..e20cdbd62d64 100644
--- a/mlir/test/Dialect/Linalg/sparse_1d.mlir
+++ b/mlir/test/Dialect/Linalg/sparse_1d.mlir
@@ -635,3 +635,48 @@ func @mul_ss(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
} -> tensor<32xf32>
return %0 : tensor<32xf32>
}
+
+#trait_sum_reduction = {
+ indexing_maps = [
+ affine_map<(i) -> (i)>, // a
+ affine_map<(i) -> ()> // x (scalar out)
+ ],
+ sparse = [
+ [ "S" ], // a
+ [ ] // x
+ ],
+ iterator_types = ["reduction"],
+ doc = "x = SUM_i a(i)"
+}
+
+// CHECK-LABEL: func @sum_reduction(
+// CHECK-SAME: %[[VAL_0:.*]]: tensor<?xf32>,
+// CHECK-SAME: %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
+// CHECK: %[[VAL_2:.*]] = constant 999 : index
+// CHECK: %[[VAL_3:.*]] = constant 0 : index
+// CHECK: %[[VAL_4:.*]] = constant 1 : index
+// CHECK: %[[VAL_5:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK: %[[VAL_6:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
+// CHECK: %[[VAL_8:.*]] = alloca() : memref<f32>
+// CHECK: %[[VAL_9:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK: %[[VAL_10:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] {
+// CHECK: %[[VAL_12:.*]] = load %[[VAL_8]][] : memref<f32>
+// CHECK: %[[VAL_13:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_11]]] : memref<?xf32>
+// CHECK: %[[VAL_14:.*]] = addf %[[VAL_12]], %[[VAL_13]] : f32
+// CHECK: store %[[VAL_14]], %[[VAL_8]][] : memref<f32>
+// CHECK: }
+// CHECK: %[[VAL_15:.*]] = tensor_load %[[VAL_8]] : memref<f32>
+// CHECK: return %[[VAL_15]] : tensor<f32>
+// CHECK: }
+func @sum_reduction(%arga: tensor<?xf32>, %argx: tensor<f32>) -> tensor<f32> {
+ %0 = linalg.generic #trait_sum_reduction
+ ins(%arga : tensor<?xf32>)
+ init(%argx : tensor<f32>) {
+ ^bb(%a : f32, %x : f32):
+ %0 = addf %x, %a : f32
+ linalg.yield %0: f32
+ } -> tensor<f32>
+ return %0 : tensor<f32>
+}
diff --git a/mlir/test/Dialect/Linalg/sparse_2d.mlir b/mlir/test/Dialect/Linalg/sparse_2d.mlir
index 38d55ef69c4e..a4692be29c89 100644
--- a/mlir/test/Dialect/Linalg/sparse_2d.mlir
+++ b/mlir/test/Dialect/Linalg/sparse_2d.mlir
@@ -1056,3 +1056,131 @@ func @matvec(%argA: tensor<16x32xf32>, %argb: tensor<32xf32>, %argx: tensor<16xf
} -> tensor<16xf32>
return %0 : tensor<16xf32>
}
+
+#trait_sum_reduction = {
+ indexing_maps = [
+ affine_map<(i,j) -> (i,j)>, // a
+ affine_map<(i,j) -> ()> // x (scalar out)
+ ],
+ sparse = [
+ [ "D","S" ], // a
+ [ ] // x
+ ],
+ iterator_types = ["reduction", "reduction"],
+ doc = "x = SUM_ij a(i,j)"
+}
+
+// CHECK-LABEL: func @sum_reduction(
+// CHECK-SAME: %[[VAL_0:.*]]: tensor<10x20xf32>,
+// CHECK-SAME: %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
+// CHECK: %[[VAL_2:.*]] = constant 999 : index
+// CHECK: %[[VAL_3:.*]] = constant 10 : index
+// CHECK: %[[VAL_4:.*]] = constant 0 : index
+// CHECK: %[[VAL_5:.*]] = constant 1 : index
+// CHECK: %[[VAL_6:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK: %[[VAL_8:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
+// CHECK: %[[VAL_9:.*]] = alloca() : memref<f32>
+// CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
+// CHECK: %[[VAL_11:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<?xindex>
+// CHECK: %[[VAL_12:.*]] = addi %[[VAL_10]], %[[VAL_5]] : index
+// CHECK: %[[VAL_13:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref<?xindex>
+// CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_11]] to %[[VAL_13]] step %[[VAL_5]] {
+// CHECK: %[[VAL_15:.*]] = load %[[VAL_9]][] : memref<f32>
+// CHECK: %[[VAL_16:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_14]]] : memref<?xf32>
+// CHECK: %[[VAL_17:.*]] = addf %[[VAL_15]], %[[VAL_16]] : f32
+// CHECK: store %[[VAL_17]], %[[VAL_9]][] : memref<f32>
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_18:.*]] = tensor_load %[[VAL_9]] : memref<f32>
+// CHECK: return %[[VAL_18]] : tensor<f32>
+// CHECK: }
+func @sum_reduction(%arga: tensor<10x20xf32>, %argx: tensor<f32>) -> tensor<f32> {
+ %0 = linalg.generic #trait_sum_reduction
+ ins(%arga : tensor<10x20xf32>)
+ init(%argx : tensor<f32>) {
+ ^bb(%a : f32, %x : f32):
+ %0 = addf %x, %a : f32
+ linalg.yield %0: f32
+ } -> tensor<f32>
+ return %0 : tensor<f32>
+}
+
+#trait_sampled_dense_dense = {
+ indexing_maps = [
+ affine_map<(i,j,k) -> (i,j)>, // S
+ affine_map<(i,j,k) -> (i,k)>, // A
+ affine_map<(i,j,k) -> (k,j)>, // B
+ affine_map<(i,j,k) -> (i,j)> // X (out)
+ ],
+ sparse = [
+ [ "S", "S" ], // S
+ [ "D", "D" ], // A
+ [ "D", "D" ], // B
+ [ "D", "D" ] // X
+ ],
+ iterator_types = ["parallel", "parallel", "reduction"],
+ doc = "X(i,j) = S(i,j) SUM_k A(i,k) B(k,j)"
+}
+
+// CHECK-LABEL: func @sampled_dense_dense(
+// CHECK-SAME: %[[VAL_0:.*0]]: tensor<?x?xf32>,
+// CHECK-SAME: %[[VAL_1:.*1]]: tensor<?x?xf32>,
+// CHECK-SAME: %[[VAL_2:.*2]]: tensor<?x?xf32>,
+// CHECK-SAME: %[[VAL_3:.*3]]: tensor<?x?xf32>) -> tensor<?x?xf32> {
+// CHECK: %[[VAL_4:.*]] = constant 999 : index
+// CHECK: %[[VAL_5:.*]] = constant 0 : index
+// CHECK: %[[VAL_6:.*]] = constant 1 : index
+// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_4]]) : memref<?xindex>
+// CHECK: %[[VAL_8:.*]] = alloca(%[[VAL_4]]) : memref<?xindex>
+// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_4]]) : memref<?xindex>
+// CHECK: %[[VAL_10:.*]] = alloca(%[[VAL_4]]) : memref<?xindex>
+// CHECK: %[[VAL_11:.*]] = alloca(%[[VAL_4]]) : memref<?xf32>
+// CHECK: %[[VAL_12:.*]] = dim %[[VAL_1]], %[[VAL_5]] : tensor<?x?xf32>
+// CHECK: %[[VAL_13:.*]] = dim %[[VAL_1]], %[[VAL_6]] : tensor<?x?xf32>
+// CHECK: %[[VAL_14:.*]] = alloca(%[[VAL_12]], %[[VAL_13]]) : memref<?x?xf32>
+// CHECK: %[[VAL_15:.*]] = dim %[[VAL_2]], %[[VAL_5]] : tensor<?x?xf32>
+// CHECK: %[[VAL_16:.*]] = dim %[[VAL_2]], %[[VAL_6]] : tensor<?x?xf32>
+// CHECK: %[[VAL_17:.*]] = alloca(%[[VAL_15]], %[[VAL_16]]) : memref<?x?xf32>
+// CHECK: %[[VAL_18:.*]] = dim %[[VAL_3]], %[[VAL_5]] : tensor<?x?xf32>
+// CHECK: %[[VAL_19:.*]] = dim %[[VAL_3]], %[[VAL_6]] : tensor<?x?xf32>
+// CHECK: %[[VAL_20:.*]] = alloca(%[[VAL_18]], %[[VAL_19]]) : memref<?x?xf32>
+// CHECK: %[[VAL_21:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK: %[[VAL_22:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
+// CHECK: scf.for %[[VAL_23:.*]] = %[[VAL_21]] to %[[VAL_22]] step %[[VAL_6]] {
+// CHECK: %[[VAL_24:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_23]]] : memref<?xindex>
+// CHECK: scf.for %[[VAL_25:.*]] = %[[VAL_5]] to %[[VAL_15]] step %[[VAL_6]] {
+// CHECK: %[[VAL_26:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_23]]] : memref<?xindex>
+// CHECK: %[[VAL_27:.*]] = addi %[[VAL_23]], %[[VAL_6]] : index
+// CHECK: %[[VAL_28:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_27]]] : memref<?xindex>
+// CHECK: scf.for %[[VAL_29:.*]] = %[[VAL_26]] to %[[VAL_28]] step %[[VAL_6]] {
+// CHECK: %[[VAL_30:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_29]]] : memref<?xindex>
+// CHECK: %[[VAL_31:.*]] = load %[[VAL_20]]{{\[}}%[[VAL_24]], %[[VAL_30]]] : memref<?x?xf32>
+// CHECK: %[[VAL_32:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_29]]] : memref<?xf32>
+// CHECK: %[[VAL_33:.*]] = load %[[VAL_14]]{{\[}}%[[VAL_24]], %[[VAL_25]]] : memref<?x?xf32>
+// CHECK: %[[VAL_34:.*]] = load %[[VAL_17]]{{\[}}%[[VAL_25]], %[[VAL_30]]] : memref<?x?xf32>
+// CHECK: %[[VAL_35:.*]] = mulf %[[VAL_33]], %[[VAL_34]] : f32
+// CHECK: %[[VAL_36:.*]] = mulf %[[VAL_32]], %[[VAL_35]] : f32
+// CHECK: %[[VAL_37:.*]] = addf %[[VAL_31]], %[[VAL_36]] : f32
+// CHECK: store %[[VAL_37]], %[[VAL_20]]{{\[}}%[[VAL_24]], %[[VAL_30]]] : memref<?x?xf32>
+// CHECK: }
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_38:.*]] = tensor_load %[[VAL_20]] : memref<?x?xf32>
+// CHECK: return %[[VAL_38]] : tensor<?x?xf32>
+// CHECK: }
+func @sampled_dense_dense(%args: tensor<?x?xf32>,
+ %arga: tensor<?x?xf32>,
+ %argb: tensor<?x?xf32>,
+ %argx: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %0 = linalg.generic #trait_sampled_dense_dense
+ ins(%args, %arga, %argb : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)
+ init(%argx : tensor<?x?xf32>) {
+ ^bb(%s : f32, %a : f32, %b : f32, %x : f32):
+ %0 = mulf %a, %b : f32
+ %1 = mulf %s, %0 : f32
+ %2 = addf %x, %1 : f32
+ linalg.yield %2: f32
+ } -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
+}
diff --git a/mlir/test/Dialect/Linalg/sparse_3d.mlir b/mlir/test/Dialect/Linalg/sparse_3d.mlir
index a253ea4557c3..a6794f2e6487 100644
--- a/mlir/test/Dialect/Linalg/sparse_3d.mlir
+++ b/mlir/test/Dialect/Linalg/sparse_3d.mlir
@@ -1223,3 +1223,61 @@ func @kernel_3d(%arga: tensor<?x?xf32>,
} -> tensor<?x?xf32>
return %0 : tensor<?x?xf32>
}
+#trait_sum_reduction = {
+ indexing_maps = [
+ affine_map<(i,j,k) -> (i,j,k)>, // a
+ affine_map<(i,j,k) -> ()> // x (scalar out)
+ ],
+ sparse = [
+ [ "S", "S", "S" ], // a
+ [ ] // x
+ ],
+ iterator_types = ["reduction", "reduction", "reduction"],
+ doc = "x = SUM_ijk a(i,j,k)"
+}
+
+// CHECK-LABEL: func @sum_reduction(
+// CHECK-SAME: %[[VAL_0:.*]]: tensor<10x20x30xf32>,
+// CHECK-SAME: %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
+// CHECK: %[[VAL_2:.*]] = constant 999 : index
+// CHECK: %[[VAL_3:.*]] = constant 0 : index
+// CHECK: %[[VAL_4:.*]] = constant 1 : index
+// CHECK: %[[VAL_5:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK: %[[VAL_6:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK: %[[VAL_8:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK: %[[VAL_10:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
+// CHECK: %[[VAL_11:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
+// CHECK: %[[VAL_12:.*]] = alloca() : memref<f32>
+// CHECK: %[[VAL_13:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK: %[[VAL_14:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step %[[VAL_4]] {
+// CHECK: %[[VAL_16:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
+// CHECK: %[[VAL_17:.*]] = addi %[[VAL_15]], %[[VAL_4]] : index
+// CHECK: %[[VAL_18:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_17]]] : memref<?xindex>
+// CHECK: scf.for %[[VAL_19:.*]] = %[[VAL_16]] to %[[VAL_18]] step %[[VAL_4]] {
+// CHECK: %[[VAL_20:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_19]]] : memref<?xindex>
+// CHECK: %[[VAL_21:.*]] = addi %[[VAL_19]], %[[VAL_4]] : index
+// CHECK: %[[VAL_22:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_21]]] : memref<?xindex>
+// CHECK: scf.for %[[VAL_23:.*]] = %[[VAL_20]] to %[[VAL_22]] step %[[VAL_4]] {
+// CHECK: %[[VAL_24:.*]] = load %[[VAL_12]][] : memref<f32>
+// CHECK: %[[VAL_25:.*]] = load %[[VAL_11]]{{\[}}%[[VAL_23]]] : memref<?xf32>
+// CHECK: %[[VAL_26:.*]] = addf %[[VAL_24]], %[[VAL_25]] : f32
+// CHECK: store %[[VAL_26]], %[[VAL_12]][] : memref<f32>
+// CHECK: }
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_27:.*]] = tensor_load %[[VAL_12]] : memref<f32>
+// CHECK: return %[[VAL_27]] : tensor<f32>
+// CHECK: }
+func @sum_reduction(%arga: tensor<10x20x30xf32>, %argx: tensor<f32>) -> tensor<f32> {
+ %0 = linalg.generic #trait_sum_reduction
+ ins(%arga : tensor<10x20x30xf32>)
+ init(%argx : tensor<f32>) {
+ ^bb(%a : f32, %x : f32):
+ %0 = addf %x, %a : f32
+ linalg.yield %0: f32
+ } -> tensor<f32>
+ return %0 : tensor<f32>
+}
More information about the Mlir-commits
mailing list