[Mlir-commits] [mlir] 2a28861 - [mlir][sparse] improved testing and codegen for semi-ring operations
Aart Bik
llvmlistbot at llvm.org
Thu Jun 16 16:14:00 PDT 2022
Author: Aart Bik
Date: 2022-06-16T16:13:42-07:00
New Revision: 2a2886160d80f285d7637f91133ded7bac0f9879
URL: https://github.com/llvm/llvm-project/commit/2a2886160d80f285d7637f91133ded7bac0f9879
DIFF: https://github.com/llvm/llvm-project/commit/2a2886160d80f285d7637f91133ded7bac0f9879.diff
LOG: [mlir][sparse] improved testing and codegen for semi-ring operations
The semi-ring blocks were simply "inlined" by the sparse compiler but
without any filtering or patching. This revision improves the analysis
(rejecting blocks that use non-invariant computations from outside
their blocks, except for linalg.index) and also improves the codegen
by properly patching up index computations (previous version crashed).
With a regression test. Also updated the documentation now that the
example code is properly working.
Reviewed By: bixia
Differential Revision: https://reviews.llvm.org/D128000
Added:
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_triangular_bin.mlir
Modified:
mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
index fc09a068fe28..dddb25618f5c 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
@@ -188,6 +188,8 @@ def SparseTensor_LexInsertOp : SparseTensor_Op<"lex_insert", []>,
is solely defined by side-effects and not SSA values. The semantics
may be refined over time as our sparse abstractions evolve.
+ Example:
+
```mlir
sparse_tensor.lex_insert %tensor, %indices, %val
: tensor<1024x1024xf64, #CSR>, memref<?xindex>, f64
@@ -385,7 +387,8 @@ def SparseTensor_BinaryOp : SparseTensor_Op<"binary", [NoSideEffect]>,
would be equivalent to a union operation where non-overlapping values
in the inputs are copied to the output unchanged.
- Example of isEqual applied to intersecting elements only.
+ Example of isEqual applied to intersecting elements only:
+
```mlir
%C = bufferization.alloc_tensor...
%0 = linalg.generic #trait
@@ -405,8 +408,8 @@ def SparseTensor_BinaryOp : SparseTensor_Op<"binary", [NoSideEffect]>,
} -> tensor<?xi8, #SparseVec>
```
- Example of A+B in upper triangle, A-B in lower triangle
- (not working yet, but construct will be available soon).
+ Example of A+B in upper triangle, A-B in lower triangle:
+
```mlir
%C = bufferization.alloc_tensor...
%1 = linalg.generic #trait
@@ -438,7 +441,8 @@ def SparseTensor_BinaryOp : SparseTensor_Op<"binary", [NoSideEffect]>,
Example of set
diff erence. Returns a copy of A where its sparse structure
is *not* overlapped by B. The element type of B can be
diff erent than A
- because we never use its values, only its sparse structure.
+ because we never use its values, only its sparse structure:
+
```mlir
%C = bufferization.alloc_tensor...
%2 = linalg.generic #trait
@@ -486,6 +490,7 @@ def SparseTensor_UnaryOp : SparseTensor_Op<"unary", [NoSideEffect]>,
region does not contribute to the output.
Example of A+1, restricted to existing elements:
+
```mlir
%C = bufferization.alloc_tensor...
%0 = linalg.generic #trait
@@ -546,6 +551,7 @@ def SparseTensor_YieldOp : SparseTensor_Op<"yield", [NoSideEffect, Terminator]>,
Yields a value from within a `binary` or `unary` block.
Example:
+
```
%0 = sparse_tensor.unary %a : i64 to i64 {
^bb0(%arg0: i64):
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
index b6aa7b99e54a..d0b6758d00ab 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -881,9 +881,8 @@ static Value genAddress(CodeGen &codegen, OpBuilder &builder, Location loc,
}
/// Generates an index value.
-static Value genIndexValue(Merger &merger, CodeGen &codegen, OpBuilder &builder,
- unsigned exp, unsigned ldx) {
- unsigned idx = merger.exp(exp).index;
+static Value genIndexValue(CodeGen &codegen, OpBuilder &builder, unsigned idx,
+ unsigned ldx) {
Value ival = codegen.loops[idx];
Type itype = ival.getType();
// During vectorization, we either encounter:
@@ -913,6 +912,25 @@ static Value genIndexValue(Merger &merger, CodeGen &codegen, OpBuilder &builder,
return ival;
}
+/// Semi-ring branches are simply inlined by the sparse compiler. Prior
+/// analysis has verified that all computations are "local" to the inlined
+/// branch or otherwise invariantly defined outside the loop nest, with the
+/// exception of index computations, which need to be relinked to actual
+/// inlined cloned code.
+static Value relinkBranch(CodeGen &codegen, RewriterBase &rewriter,
+ Block *block, Value e, unsigned ldx) {
+ if (Operation *def = e.getDefiningOp()) {
+ if (auto indexOp = dyn_cast<linalg::IndexOp>(def))
+ return genIndexValue(codegen, rewriter, indexOp.dim(), ldx);
+ if (def->getBlock() == block) {
+ for (unsigned i = 0, n = def->getNumOperands(); i < n; i++)
+ def->setOperand(
+ i, relinkBranch(codegen, rewriter, block, def->getOperand(i), ldx));
+ }
+ }
+ return e;
+}
+
/// Recursively generates tensor expression.
static Value genExp(Merger &merger, CodeGen &codegen, RewriterBase &rewriter,
linalg::GenericOp op, unsigned exp, unsigned ldx) {
@@ -924,12 +942,17 @@ static Value genExp(Merger &merger, CodeGen &codegen, RewriterBase &rewriter,
if (merger.exp(exp).kind == Kind::kInvariant)
return genInvariantValue(merger, codegen, rewriter, exp);
if (merger.exp(exp).kind == Kind::kIndex)
- return genIndexValue(merger, codegen, rewriter, exp, ldx);
+ return genIndexValue(codegen, rewriter, merger.exp(exp).index, ldx);
Value v0 =
genExp(merger, codegen, rewriter, op, merger.exp(exp).children.e0, ldx);
Value v1 =
genExp(merger, codegen, rewriter, op, merger.exp(exp).children.e1, ldx);
- return merger.buildExp(rewriter, loc, exp, v0, v1);
+ Value ee = merger.buildExp(rewriter, loc, exp, v0, v1);
+ if (ee && (merger.exp(exp).kind == Kind::kUnary ||
+ merger.exp(exp).kind == Kind::kBinary ||
+ merger.exp(exp).kind == Kind::kBinaryBranch))
+ ee = relinkBranch(codegen, rewriter, ee.getParentBlock(), ee, ldx);
+ return ee;
}
/// Determines if affine expression is invariant.
diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
index 78771d1a9083..ed19cf7a1869 100644
--- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
@@ -798,7 +798,9 @@ unsigned Merger::buildLattices(unsigned e, unsigned i) {
}
Optional<unsigned> Merger::buildTensorExpFromLinalg(linalg::GenericOp op) {
+ // Build the linalg semantics backward from yield.
Operation *yield = op.region().front().getTerminator();
+ assert(isa<linalg::YieldOp>(yield));
return buildTensorExp(op, yield->getOperand(0));
}
@@ -832,6 +834,37 @@ Type Merger::inferType(unsigned e, Value src) {
return dtp;
}
+/// Ensures that sparse compiler can generate code for expression.
+static bool isAdmissableBranchExp(Operation *op, Block *block, Value v) {
+ // Arguments are always admissable.
+ if (auto arg = v.dyn_cast<BlockArgument>())
+ return true;
+ // Accept index anywhere.
+ Operation *def = v.getDefiningOp();
+ if (isa<linalg::IndexOp>(def))
+ return true;
+ // Operation defined outside branch.
+ if (def->getBlock() != block) {
+ return def->getBlock() != op->getBlock(); // invariant?
+ }
+ // Operation defined within branch. Anything is accepted,
+ // as long as all subexpressions are admissable.
+ for (unsigned i = 0, n = def->getNumOperands(); i < n; i++)
+ if (!isAdmissableBranchExp(op, block, def->getOperand(i)))
+ return false;
+ return true;
+}
+
+/// Ensures that sparse compiler can generate code for branch.
+static bool isAdmissableBranch(Operation *op, Region ®ion) {
+ if (region.empty())
+ return true;
+ // Build the semi-ring branch semantics backward from yield.
+ Operation *yield = region.front().getTerminator();
+ assert(isa<YieldOp>(yield));
+ return isAdmissableBranchExp(op, ®ion.front(), yield->getOperand(0));
+}
+
Optional<unsigned> Merger::buildTensorExp(linalg::GenericOp op, Value v) {
if (auto arg = v.dyn_cast<BlockArgument>()) {
unsigned argN = arg.getArgNumber();
@@ -920,8 +953,11 @@ Optional<unsigned> Merger::buildTensorExp(linalg::GenericOp op, Value v) {
return addExp(kCRe, e);
if (isa<arith::BitcastOp>(def))
return addExp(kBitCast, e, v);
- if (isa<sparse_tensor::UnaryOp>(def))
- return addExp(kUnary, e, Value(), def);
+ if (auto unop = dyn_cast<sparse_tensor::UnaryOp>(def)) {
+ if (isAdmissableBranch(unop, unop.presentRegion()) &&
+ isAdmissableBranch(unop, unop.absentRegion()))
+ return addExp(kUnary, e, Value(), def);
+ }
}
}
// Construct binary operations if subexpressions can be built.
@@ -971,8 +1007,14 @@ Optional<unsigned> Merger::buildTensorExp(linalg::GenericOp op, Value v) {
return addExp(kShrU, e0, e1);
if (isa<arith::ShLIOp>(def) && isInvariant(e1))
return addExp(kShlI, e0, e1);
- if (isa<sparse_tensor::BinaryOp>(def))
- return addExp(kBinary, e0, e1, Value(), def);
+ if (auto binop = dyn_cast<sparse_tensor::BinaryOp>(def)) {
+ if (isAdmissableBranch(binop, binop.overlapRegion()) &&
+ (binop.left_identity() ||
+ isAdmissableBranch(binop, binop.leftRegion())) &&
+ (binop.right_identity() ||
+ isAdmissableBranch(binop, binop.rightRegion())))
+ return addExp(kBinary, e0, e1, Value(), def);
+ }
}
}
// Cannot build.
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_triangular_bin.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_triangular_bin.mlir
new file mode 100644
index 000000000000..f07f4e990346
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_triangular_bin.mlir
@@ -0,0 +1,95 @@
+// RUN: mlir-opt %s --sparse-compiler | \
+// RUN: mlir-cpu-runner \
+// RUN: -e entry -entry-point-result=void \
+// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+#SparseMatrix = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>
+
+#trait_op = {
+ indexing_maps = [
+ affine_map<(i,j) -> (i,j)>, // A
+ affine_map<(i,j) -> (i,j)>, // B
+ affine_map<(i,j) -> (i,j)> // X (out)
+ ],
+ iterator_types = ["parallel","parallel"],
+ doc = "X(i,j) = A(i,j) OP B(i,j)"
+}
+
+module {
+ // Performs triangular add/sub operation (using semi-ring binary op).
+ func.func @triangular(%A: tensor<4x4xf64, #SparseMatrix>,
+ %B: tensor<4x4xf64, #SparseMatrix>) -> tensor<4x4xf64, #SparseMatrix> {
+ %C = bufferization.alloc_tensor() : tensor<4x4xf64, #SparseMatrix>
+ %0 = linalg.generic #trait_op
+ ins(%A, %B: tensor<4x4xf64, #SparseMatrix>,
+ tensor<4x4xf64, #SparseMatrix>)
+ outs(%C: tensor<4x4xf64, #SparseMatrix>) {
+ ^bb0(%a: f64, %b: f64, %c: f64) :
+ %row = linalg.index 0 : index
+ %col = linalg.index 1 : index
+ %result = sparse_tensor.binary %a, %b : f64, f64 to f64
+ overlap={
+ ^bb0(%x: f64, %y: f64):
+ %cmp = arith.cmpi "uge", %col, %row : index
+ %upperTriangleResult = arith.addf %x, %y : f64
+ %lowerTriangleResult = arith.subf %x, %y : f64
+ %ret = arith.select %cmp, %upperTriangleResult, %lowerTriangleResult : f64
+ sparse_tensor.yield %ret : f64
+ }
+ left=identity
+ right={
+ ^bb0(%y: f64):
+ %cmp = arith.cmpi "uge", %col, %row : index
+ %lowerTriangleResult = arith.negf %y : f64
+ %ret = arith.select %cmp, %y, %lowerTriangleResult : f64
+ sparse_tensor.yield %ret : f64
+ }
+ linalg.yield %result : f64
+ } -> tensor<4x4xf64, #SparseMatrix>
+ return %0 : tensor<4x4xf64, #SparseMatrix>
+ }
+
+ // Driver method to call and verify triangular kernel.
+ func.func @entry() {
+ %c0 = arith.constant 0 : index
+ %du = arith.constant -1.0 : f64
+
+ %am = arith.constant dense<
+ [ [ 1.0, 0.0, 3.0, 0.0],
+ [ 0.0, 2.0, 0.0, 0.0],
+ [ 0.0, 0.0, 0.0, 4.0],
+ [ 3.0, 4.0, 0.0, 0.0] ]> : tensor<4x4xf64>
+ %bm = arith.constant dense<
+ [ [ 1.0, 0.0, 1.0, 1.0],
+ [ 0.0, 0.5, 0.0, 0.0],
+ [ 1.0, 5.0, 2.0, 0.0],
+ [ 2.0, 0.0, 0.0, 0.0] ]> : tensor<4x4xf64>
+
+ %a = sparse_tensor.convert %am : tensor<4x4xf64> to tensor<4x4xf64, #SparseMatrix>
+ %b = sparse_tensor.convert %bm : tensor<4x4xf64> to tensor<4x4xf64, #SparseMatrix>
+ %0 = call @triangular(%a, %b) : (tensor<4x4xf64, #SparseMatrix>,
+ tensor<4x4xf64, #SparseMatrix>) -> tensor<4x4xf64, #SparseMatrix>
+
+ //
+ // Verify the results.
+ //
+ // CHECK: ( ( 2, 0, 4, 1 ), ( 0, 2.5, 0, 0 ), ( -1, -5, 2, 4 ), ( 1, 4, 0, 0 ) )
+ // CHECK-NEXST: ( 2, 4, 1, 2.5, -1, -5, 2, 4, 1, 4, -1, -1, -1, -1, -1, -1 )
+ //
+ %c = sparse_tensor.convert %0 : tensor<4x4xf64, #SparseMatrix> to tensor<4x4xf64>
+ %m = bufferization.to_memref %c : memref<4x4xf64>
+ %v = vector.transfer_read %m[%c0, %c0], %du: memref<4x4xf64>, vector<4x4xf64>
+ vector.print %v : vector<4x4xf64>
+ %1 = sparse_tensor.values %0 : tensor<4x4xf64, #SparseMatrix> to memref<?xf64>
+ %2 = vector.transfer_read %1[%c0], %du: memref<?xf64>, vector<16xf64>
+ vector.print %2 : vector<16xf64>
+
+ // Release the resources.
+ memref.dealloc %m : memref<4x4xf64>
+ sparse_tensor.release %a : tensor<4x4xf64, #SparseMatrix>
+ sparse_tensor.release %b : tensor<4x4xf64, #SparseMatrix>
+ sparse_tensor.release %0 : tensor<4x4xf64, #SparseMatrix>
+ return
+ }
+}
More information about the Mlir-commits
mailing list