[Mlir-commits] [mlir] e2167d8 - [mlir][sparse] refine absent branch feeding into custom op
Aart Bik
llvmlistbot at llvm.org
Tue Jun 6 09:57:25 PDT 2023
Author: Aart Bik
Date: 2023-06-06T09:57:15-07:00
New Revision: e2167d89dba0a276ff756b80b5f4a1ebcb83a611
URL: https://github.com/llvm/llvm-project/commit/e2167d89dba0a276ff756b80b5f4a1ebcb83a611
DIFF: https://github.com/llvm/llvm-project/commit/e2167d89dba0a276ff756b80b5f4a1ebcb83a611.diff
LOG: [mlir][sparse] refine absent branch feeding into custom op
Document better that unary/binary may only feed to the output
or the input of a custom reduction (not even a regular reduction
since it may have "no value"!). Also fixes a bug when present
branch is empty and feeds into custom reduction.
Reviewed By: Peiming
Differential Revision: https://reviews.llvm.org/D152224
Added:
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_sum.mlir
Modified:
mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
index e1bcccb9b8f09..9fd81b98a2dcd 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
@@ -889,6 +889,11 @@ def SparseTensor_BinaryOp : SparseTensor_Op<"binary", [Pure]>,
would be equivalent to a union operation where non-overlapping values
in the inputs are copied to the output unchanged.
+ Due to the possibility of empty regions, i.e. lack of a value for certain
+ cases, the result of this operation may only feed directly into the output
+ of the `linalg.generic` operation or into into a custom reduction
+ `sparse_tensor.reduce` operation that follows in the same region.
+
Example of isEqual applied to intersecting elements only:
```mlir
@@ -992,6 +997,11 @@ def SparseTensor_UnaryOp : SparseTensor_Op<"unary", [Pure]>,
A region may also be declared empty (i.e. `absent={}`), indicating that the
region does not contribute to the output.
+ Due to the possibility of empty regions, i.e. lack of a value for certain
+ cases, the result of this operation may only feed directly into the output
+ of the `linalg.generic` operation or into into a custom reduction
+ `sparse_tensor.reduce` operation that follows in the same region.
+
Example of A+1, restricted to existing elements:
```mlir
@@ -1015,28 +1025,41 @@ def SparseTensor_UnaryOp : SparseTensor_Op<"unary", [Pure]>,
Example returning +1 for existing values and -1 for missing values:
```mlir
- %result = sparse_tensor.unary %a : f64 to i32
- present={
- ^bb0(%x: f64):
- %ret = arith.constant 1 : i32
+ %C = bufferization.alloc_tensor...
+ %1 = linalg.generic #trait
+ ins(%A: tensor<?xf64, #SparseVector>)
+ outs(%C: tensor<?xf64, #SparseVector>) {
+ ^bb0(%a: f64, %c: f64) :
+ %result = sparse_tensor.unary %a : f64 to i32
+ present={
+ ^bb0(%x: f64):
+ %ret = arith.constant 1 : i32
+ sparse_tensor.yield %ret : i32
+ }
+ absent={
+ %ret = arith.constant -1 : i32
sparse_tensor.yield %ret : i32
- }
- absent={
- %ret = arith.constant -1 : i32
- sparse_tensor.yield %ret : i32
- }
+ }
+ linalg.yield %result : f64
+ } -> tensor<?xf64, #SparseVector>
```
Example showing a structural inversion (existing values become missing in
the output, while missing values are filled with 1):
```mlir
- %result = sparse_tensor.unary %a : f64 to i64
- present={}
- absent={
- %ret = arith.constant 1 : i64
- sparse_tensor.yield %ret : i64
- }
+ %C = bufferization.alloc_tensor...
+ %2 = linalg.generic #trait
+ ins(%A: tensor<?xf64, #SparseVector>)
+ outs(%C: tensor<?xf64, #SparseVector>) {
+ %result = sparse_tensor.unary %a : f64 to i64
+ present={}
+ absent={
+ %ret = arith.constant 1 : i64
+ sparse_tensor.yield %ret : i64
+ }
+ linalg.yield %result : f64
+ } -> tensor<?xf64, #SparseVector>
```
}];
@@ -1139,7 +1162,7 @@ def SparseTensor_SelectOp : SparseTensor_Op<"select", [Pure, SameOperandsAndResu
```mlir
%C = bufferization.alloc_tensor...
- %0 = linalg.generic #trait
+ %1 = linalg.generic #trait
ins(%A: tensor<?x?xf64, #CSR>)
outs(%C: tensor<?x?xf64, #CSR>) {
^bb0(%a: f64, %c: f64) :
@@ -1173,10 +1196,12 @@ def SparseTensor_YieldOp : SparseTensor_Op<"yield", [Pure, Terminator]>,
```mlir
%0 = sparse_tensor.unary %a : i64 to i64 {
- ^bb0(%arg0: i64):
- %cst = arith.constant 1 : i64
- %ret = arith.addi %arg0, %cst : i64
- sparse_tensor.yield %ret : i64
+ present={
+ ^bb0(%arg0: i64):
+ %cst = arith.constant 1 : i64
+ %ret = arith.addi %arg0, %cst : i64
+ sparse_tensor.yield %ret : i64
+ }
}
```
}];
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
index 4e7e8f767b6cc..891befdcb51ea 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -1049,11 +1049,13 @@ static Value genTensorLoad(CodegenEnv &env, OpBuilder &builder, ExprId exp) {
/// Generates a store on a dense or sparse tensor.
static void genTensorStore(CodegenEnv &env, OpBuilder &builder, ExprId exp,
Value rhs) {
- // Only unary and binary are allowed to return uninitialized rhs
- // to indicate missing output.
+ // Only unary and binary are allowed to return an uninitialized rhs
+ // to indicate missing output. Or otherwise a custom reduction that
+ // received no value to accumulate.
if (!rhs) {
assert(env.exp(exp).kind == TensorExp::Kind::kUnary ||
- env.exp(exp).kind == TensorExp::Kind::kBinary);
+ env.exp(exp).kind == TensorExp::Kind::kBinary ||
+ env.exp(exp).kind == TensorExp::Kind::kReduce);
return;
}
// Test if this is a scalarized reduction.
@@ -1146,12 +1148,17 @@ static Value genExp(CodegenEnv &env, RewriterBase &rewriter, ExprId e,
Value v0 = genExp(env, rewriter, exp.children.e0, ldx);
Value v1 = genExp(env, rewriter, exp.children.e1, ldx);
- Value ee = env.merger().buildExp(rewriter, loc, e, v0, v1);
- if (ee &&
- (kind == TensorExp::Kind::kUnary || kind == TensorExp::Kind::kBinary ||
- kind == TensorExp::Kind::kBinaryBranch ||
- kind == TensorExp::Kind::kReduce || kind == TensorExp::Kind::kSelect))
- ee = relinkBranch(env, rewriter, ee.getParentBlock(), ee, ldx);
+ Value ee;
+ if (kind == TensorExp::Kind::kReduce && (!v0 || !v1)) {
+ // custom reduce did not receive a value
+ } else {
+ ee = env.merger().buildExp(rewriter, loc, e, v0, v1);
+ if (ee &&
+ (kind == TensorExp::Kind::kUnary || kind == TensorExp::Kind::kBinary ||
+ kind == TensorExp::Kind::kBinaryBranch ||
+ kind == TensorExp::Kind::kReduce || kind == TensorExp::Kind::kSelect))
+ ee = relinkBranch(env, rewriter, ee.getParentBlock(), ee, ldx);
+ }
if (kind == TensorExp::Kind::kReduce)
env.endCustomReduc(); // exit custom
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_sum.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_sum.mlir
new file mode 100644
index 0000000000000..4072d5d8c8acb
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_sum.mlir
@@ -0,0 +1,170 @@
+// DEFINE: %{option} = enable-runtime-library=true
+// DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
+// DEFINE: mlir-cpu-runner \
+// DEFINE: -e entry -entry-point-result=void \
+// DEFINE: -shared-libs=%mlir_c_runner_utils | \
+// DEFINE: FileCheck %s
+//
+// RUN: %{command}
+//
+// Do the same run, but now with direct IR generation.
+// REDEFINE: %{option} = enable-runtime-library=false
+// RUN: %{command}
+//
+// Do the same run, but now with direct IR generation and vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
+// RUN: %{command}
+
+#SV = #sparse_tensor.encoding<{ lvlTypes = [ "compressed" ] }>
+
+#trait_reduction = {
+ indexing_maps = [
+ affine_map<(i) -> (i)>, // a
+ affine_map<(i) -> ()> // x (scalar out)
+ ],
+ iterator_types = ["reduction"],
+ doc = "x += SUM_CUSTOM_i UNARY a(i)"
+}
+
+// Test of unary op feeding into custom sum reduction.
+module {
+
+ // Contrived example for stress testing, where neither branch feeds
+ // a value into a subsequent custom sum reduction. The code should
+ // be folded into the initial value 1.
+ func.func @red0(%arga: tensor<8xi32, #SV>, %argx: tensor<i32>) -> tensor<i32> {
+ %c1 = arith.constant 1 : i32
+ %0 = linalg.generic #trait_reduction
+ ins(%arga: tensor<8xi32, #SV>)
+ outs(%argx: tensor<i32>) {
+ ^bb(%a: i32, %b: i32):
+ %u = sparse_tensor.unary %a : i32 to i32
+ present={ }
+ absent={ }
+ %r = sparse_tensor.reduce %u, %b, %c1 : i32 {
+ ^bb0(%x: i32, %y: i32):
+ %sum = arith.addi %x, %y : i32
+ sparse_tensor.yield %sum : i32
+ }
+ linalg.yield %r : i32
+ } -> tensor<i32>
+ return %0 : tensor<i32>
+ }
+
+ // Typical example where present branch contributes a value
+ // into a subsequent custom sum reduction.
+ func.func @red1(%arga: tensor<8xi32, #SV>, %argx: tensor<i32>) -> tensor<i32> {
+ %c1 = arith.constant 1 : i32
+ %c2 = arith.constant 2 : i32
+ %0 = linalg.generic #trait_reduction
+ ins(%arga: tensor<8xi32, #SV>)
+ outs(%argx: tensor<i32>) {
+ ^bb(%a: i32, %b: i32):
+ %u = sparse_tensor.unary %a : i32 to i32
+ present={
+ ^bb(%p: i32):
+ sparse_tensor.yield %c2 : i32
+ }
+ absent={ }
+ %r = sparse_tensor.reduce %u, %b, %c1 : i32 {
+ ^bb0(%x: i32, %y: i32):
+ %sum = arith.addi %x, %y : i32
+ sparse_tensor.yield %sum : i32
+ }
+ linalg.yield %r : i32
+ } -> tensor<i32>
+ return %0 : tensor<i32>
+ }
+
+ // A complementing example where absent branch contributes a value
+ // into a subsequent custom sum reduction.
+ func.func @red2(%arga: tensor<8xi32, #SV>, %argx: tensor<i32>) -> tensor<i32> {
+ %c1 = arith.constant 1 : i32
+ %c3 = arith.constant 3 : i32
+ %0 = linalg.generic #trait_reduction
+ ins(%arga: tensor<8xi32, #SV>)
+ outs(%argx: tensor<i32>) {
+ ^bb(%a: i32, %b: i32):
+ %u = sparse_tensor.unary %a : i32 to i32
+ present={ }
+ absent={
+ sparse_tensor.yield %c3 : i32
+ }
+ %r = sparse_tensor.reduce %u, %b, %c1 : i32 {
+ ^bb0(%x: i32, %y: i32):
+ %sum = arith.addi %x, %y : i32
+ sparse_tensor.yield %sum : i32
+ }
+ linalg.yield %r : i32
+ } -> tensor<i32>
+ return %0 : tensor<i32>
+ }
+
+ // An example where both present and absent branch contribute values
+ // into a subsequent custom sum reduction.
+ func.func @red3(%arga: tensor<8xi32, #SV>, %argx: tensor<i32>) -> tensor<i32> {
+ %c1 = arith.constant 1 : i32
+ %c2 = arith.constant 2 : i32
+ %c3 = arith.constant 3 : i32
+ %0 = linalg.generic #trait_reduction
+ ins(%arga: tensor<8xi32, #SV>)
+ outs(%argx: tensor<i32>) {
+ ^bb(%a: i32, %b: i32):
+ %u = sparse_tensor.unary %a : i32 to i32
+ present={
+ ^bb(%p: i32):
+ sparse_tensor.yield %c2 : i32
+ } absent={
+ sparse_tensor.yield %c3 : i32
+ }
+ %r = sparse_tensor.reduce %u, %b, %c1 : i32 {
+ ^bb0(%x: i32, %y: i32):
+ %sum = arith.addi %x, %y : i32
+ sparse_tensor.yield %sum : i32
+ }
+ linalg.yield %r : i32
+ } -> tensor<i32>
+ return %0 : tensor<i32>
+ }
+
+ func.func @dump_i32(%arg0 : tensor<i32>) {
+ %v = tensor.extract %arg0[] : tensor<i32>
+ vector.print %v : i32
+ return
+ }
+
+ func.func @entry() {
+ %ri = arith.constant dense<0> : tensor<i32>
+
+ // Sparse vector of length 8 with 2 stored elements (and thus 6 implicit zeros).
+ %v0 = arith.constant sparse< [ [4], [6] ], [ 99, 999 ] > : tensor<8xi32>
+ %s0 = sparse_tensor.convert %v0: tensor<8xi32> to tensor<8xi32, #SV>
+
+ // Call the kernels.
+ %0 = call @red0(%s0, %ri) : (tensor<8xi32, #SV>, tensor<i32>) -> tensor<i32>
+ %1 = call @red1(%s0, %ri) : (tensor<8xi32, #SV>, tensor<i32>) -> tensor<i32>
+ %2 = call @red2(%s0, %ri) : (tensor<8xi32, #SV>, tensor<i32>) -> tensor<i32>
+ %3 = call @red3(%s0, %ri) : (tensor<8xi32, #SV>, tensor<i32>) -> tensor<i32>
+
+ // Verify results.
+ // 1 + nothing
+ // 1 + 2 x present
+ // 1 + 3 x absent
+ // 1 + 2 x present + 3 x absent
+ //
+ // CHECK: 1
+ // CHECK: 5
+ // CHECK: 19
+ // CHECK: 23
+ //
+ call @dump_i32(%0) : (tensor<i32>) -> ()
+ call @dump_i32(%1) : (tensor<i32>) -> ()
+ call @dump_i32(%2) : (tensor<i32>) -> ()
+ call @dump_i32(%3) : (tensor<i32>) -> ()
+
+ // Release the resources.
+ bufferization.dealloc_tensor %s0 : tensor<8xi32, #SV>
+
+ return
+ }
+}
More information about the Mlir-commits
mailing list