[Mlir-commits] [mlir] faf7cd9 - [mlir][sparse] merger extension to support sparsifying arith::CmpI/CmpF operation
Peiming Liu
llvmlistbot at llvm.org
Thu Jun 15 10:26:55 PDT 2023
Author: Peiming Liu
Date: 2023-06-15T17:26:50Z
New Revision: faf7cd97d07c6c2242a96f5daf94fb05cda0c115
URL: https://github.com/llvm/llvm-project/commit/faf7cd97d07c6c2242a96f5daf94fb05cda0c115
DIFF: https://github.com/llvm/llvm-project/commit/faf7cd97d07c6c2242a96f5daf94fb05cda0c115.diff
LOG: [mlir][sparse] merger extension to support sparsifying arith::CmpI/CmpF operation
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D152761
Added:
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cmp.mlir
Modified:
mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
mlir/test/Dialect/SparseTensor/sparse_2d.mlir
mlir/unittests/Dialect/SparseTensor/MergerTest.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
index 42dc6d52f3c5a..e166da529c14d 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
@@ -45,7 +45,7 @@ struct TensorExp final {
// The `y`, `v`, and `op` parameters either must or must not be
// `kInvalidId`/`nullptr`, depending on the value of the `k` parameter;
// however, they have uniform C++ types regardless of the value of `k`.
- TensorExp(Kind k, unsigned x, ExprId y, Value v, Operation *op);
+ TensorExp(Kind k, unsigned x, ExprId y, Value v, Operation *op, Attribute a);
/// Tensor expression kind.
Kind kind;
@@ -71,6 +71,10 @@ struct TensorExp final {
/// kBinaryBranch, this holds the YieldOp for the left or right half
/// to be merged into a nested scf loop.
Operation *op;
+
+ /// An optional attribute that is required to determine the semantics of the
+ /// operations. E.g., CmpPredicateAttr for CmpI/CmpF operations.
+ Attribute attr;
};
/// Tensor expression kind.
@@ -79,6 +83,10 @@ struct TensorExp final {
/// That is, its argument is a `LoopId` identifying the loop-variable
/// in question, and its value will be the current iteration's value
/// of that loop-variable. See the `LoopId` documentation for more details.
+///
+/// The `kSynZero` leaf kind is for representing a synthetic zero value, which
+/// can be introduced when sparsifying operations like `arith::cmp` to generate
+/// `arith::cmp %lhs, %syn_zero` when the rhs operand is absent.
//
// TODO: Modify this definition so that the numeric values already encode
// the `ExpArity` (while extending the notion of "arity" to include not
@@ -89,6 +97,7 @@ struct TensorExp final {
enum class TensorExp::Kind {
// Leaf.
kTensor = 0,
+ kSynZero,
kInvariant,
kLoopVar,
// Unary operations.
@@ -143,6 +152,8 @@ enum class TensorExp::Kind {
kAndI,
kOrI,
kXorI,
+ kCmpI,
+ kCmpF,
kShrS, // signed
kShrU, // unsigned
kShlI,
@@ -246,13 +257,16 @@ class Merger {
ExprId addLoopVarExp(LoopId i);
/// Constructs a new invariant expression, and returns its identifier.
ExprId addInvariantExp(Value v);
+ /// Constructs a new synthetic zero expression.
+ ExprId addSynZeroExp();
/// Constructs a new unary or binary expression, and returns its identifier.
ExprId addExp(TensorExp::Kind k, ExprId e0, ExprId e1 = detail::kInvalidId,
- Operation *op = nullptr);
+ Operation *op = nullptr, Attribute attr = nullptr);
/// Constructs a new sesquinary expression, and returns its identifier.
/// Currently no sesquinary `Kind` allows specifying the `op`, but we
/// allow it anyways because `mapSet` is designed to allow it.
- ExprId addExp(TensorExp::Kind k, ExprId e, Value v, Operation *op = nullptr);
+ ExprId addExp(TensorExp::Kind k, ExprId e, Value v, Operation *op = nullptr,
+ Attribute attr = nullptr);
/// Constructs a new iteration lattice point, and returns its identifier.
LatPointId addLat(TensorId t, LoopId i, ExprId e);
@@ -265,26 +279,29 @@ class Merger {
/// of `LoopId` (effectively constructing a larger "intersection" of those
/// loops) with a newly constructed tensor (sub)expression of given kind.
/// Returns the identifier of the new lattice point.
- LatPointId conjLat(TensorExp::Kind kind, LatPointId p0, LatPointId p1,
+ LatPointId conjLat(ExprId e, LatPointId p0, LatPointId p1,
Operation *op = nullptr);
/// Conjunctive merge of two lattice sets: `(s0 /\_op s1)`.
/// Returns the identifier of the new set.
- LatSetId conjSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1,
- Operation *op = nullptr);
+ LatSetId conjSet(ExprId e, LatSetId s0, LatSetId s1, Operation *op = nullptr);
/// Disjunctive merge of two lattice sets: `(s0 /\_op s1, s0, s1)`.
/// Returns the identifier of the new set.
- LatSetId disjSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1,
- Operation *op = nullptr);
+ LatSetId disjSet(ExprId e, LatSetId s0, LatSetId s1, Operation *op = nullptr);
+
+ /// Disjunctive merge of two lattice sets and also set one of the operand to
+ /// zero: `(s0 /\_op s1 (e0 op e1), s0 (0 op e0), s1 (e1 op 0))`.
+ /// Returns the identifier of the new set.
+ LatSetId disjSetWithZero(ExprId e, LatSetId s0, LatSetId s1);
/// Disjunctive merge of two lattice sets with custom handling of the
/// overlap, left, and right regions. Any region may be left missing
/// in the output. Returns the identifier of the new set.
- LatSetId combiSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1,
- Operation *orig, bool includeLeft, TensorExp::Kind ltrans,
- Operation *opleft, bool includeRight,
- TensorExp::Kind rtrans, Operation *opright);
+ LatSetId combiSet(ExprId e, LatSetId s0, LatSetId s1, Operation *orig,
+ bool includeLeft, TensorExp::Kind ltrans, Operation *opleft,
+ bool includeRight, TensorExp::Kind rtrans,
+ Operation *opright);
/// Maps the unary operator over the lattice set of the operand, i.e. each
/// lattice point on an expression E is simply copied over, but with OP E
@@ -292,6 +309,12 @@ class Merger {
LatSetId mapSet(TensorExp::Kind kind, LatSetId s, Value v = Value(),
Operation *op = nullptr);
+ /// Maps the binary operator to the same operation but with one of its operand
+ /// set to zero, i.e. each lattice point on an expression E is simply copied
+ /// over, but with `OP 0 E` (if lhsZero == true) or `OP E 0` (if lhsZero ==
+ /// false) as new expression. Returns the identifier of the new set.
+ LatSetId mapBinWithSynZeroSet(ExprId e, LatSetId s, bool lhsZero);
+
/// Optimizes the iteration lattice points in the given set. This
/// method should be called right before code generation to avoid
/// generating redundant loops and conditions.
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
index 7e69a737b0661..428bc49d14ac9 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -1154,11 +1154,11 @@ static Value relinkBranch(CodegenEnv &env, RewriterBase &rewriter, Block *block,
/// Recursively generates tensor expression.
static Value genExp(CodegenEnv &env, RewriterBase &rewriter, ExprId e,
LoopId ldx) {
- linalg::GenericOp op = env.op();
- Location loc = op.getLoc();
-
if (e == ::mlir::sparse_tensor::detail::kInvalidId)
return Value();
+
+ linalg::GenericOp op = env.op();
+ Location loc = op.getLoc();
const TensorExp &exp = env.exp(e);
const auto kind = exp.kind;
if (kind == TensorExp::Kind::kTensor)
@@ -1171,8 +1171,22 @@ static Value genExp(CodegenEnv &env, RewriterBase &rewriter, ExprId e,
if (kind == TensorExp::Kind::kReduce)
env.startCustomReduc(e); // enter custom
- Value v0 = genExp(env, rewriter, exp.children.e0, ldx);
- Value v1 = genExp(env, rewriter, exp.children.e1, ldx);
+ Value v0, v1;
+ // If either lhs/rhs is a synthetic zero, we infer the type for the zero value
+ // based on the type of the other operand.
+ if (exp.children.e0 != ::mlir::sparse_tensor::detail::kInvalidId &&
+ env.exp(exp.children.e0).kind == TensorExp::Kind::kSynZero) {
+ v1 = genExp(env, rewriter, exp.children.e1, ldx);
+ v0 = constantZero(rewriter, loc, v1.getType());
+ } else if (exp.children.e1 != ::mlir::sparse_tensor::detail::kInvalidId &&
+ env.exp(exp.children.e1).kind == TensorExp::Kind::kSynZero) {
+ v0 = genExp(env, rewriter, exp.children.e0, ldx);
+ v1 = constantZero(rewriter, loc, v0.getType());
+ } else {
+ v0 = genExp(env, rewriter, exp.children.e0, ldx);
+ v1 = genExp(env, rewriter, exp.children.e1, ldx);
+ }
+
Value ee;
if (kind == TensorExp::Kind::kReduce && (!v0 || !v1)) {
// custom reduce did not receive a value
@@ -1248,7 +1262,8 @@ static void genInvariants(CodegenEnv &env, OpBuilder &builder, ExprId exp,
env.merger().clearExprValue(exp);
}
} else if (env.exp(exp).kind != TensorExp::Kind::kInvariant &&
- env.exp(exp).kind != TensorExp::Kind::kLoopVar) {
+ env.exp(exp).kind != TensorExp::Kind::kLoopVar &&
+ env.exp(exp).kind != TensorExp::Kind::kSynZero) {
// Traverse into the binary operations. Note that we only hoist
// tensor loads, since subsequent MLIR/LLVM passes know how to
// deal with all other kinds of derived loop invariants.
diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
index 6ec5d42a78c36..7a39aa48d8706 100644
--- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
@@ -31,6 +31,7 @@ static ExpArity getExpArity(TensorExp::Kind k) {
case TensorExp::Kind::kTensor:
case TensorExp::Kind::kInvariant:
case TensorExp::Kind::kLoopVar:
+ case TensorExp::Kind::kSynZero:
return ExpArity::kNullary;
case TensorExp::Kind::kAbsF:
case TensorExp::Kind::kAbsC:
@@ -89,6 +90,8 @@ static ExpArity getExpArity(TensorExp::Kind k) {
case TensorExp::Kind::kSubF:
case TensorExp::Kind::kSubC:
case TensorExp::Kind::kSubI:
+ case TensorExp::Kind::kCmpF:
+ case TensorExp::Kind::kCmpI:
return ExpArity::kBinary;
}
llvm_unreachable("unexpected kind");
@@ -99,7 +102,7 @@ static ExpArity getExpArity(TensorExp::Kind k) {
//===----------------------------------------------------------------------===//
TensorExp::TensorExp(TensorExp::Kind k, unsigned x, ExprId y, Value v,
- Operation *o)
+ Operation *o, Attribute a)
: kind(k), val(v), op(o) {
switch (kind) {
// Leaf.
@@ -107,6 +110,9 @@ TensorExp::TensorExp(TensorExp::Kind k, unsigned x, ExprId y, Value v,
assert(x != detail::kInvalidId && y == detail::kInvalidId && !v && !o);
tensor = x;
return;
+ case TensorExp::Kind::kSynZero:
+ assert(x == detail::kInvalidId && y == detail::kInvalidId && !v && !o);
+ return;
case TensorExp::Kind::kInvariant:
assert(x == detail::kInvalidId && y == detail::kInvalidId && v && !o);
return;
@@ -191,6 +197,13 @@ TensorExp::TensorExp(TensorExp::Kind k, unsigned x, ExprId y, Value v,
children.e0 = x;
children.e1 = y;
return;
+ case TensorExp::Kind::kCmpF:
+ case TensorExp::Kind::kCmpI:
+ assert(x != detail::kInvalidId && y != detail::kInvalidId && !v && !o);
+ attr = a;
+ children.e0 = x;
+ children.e1 = y;
+ return;
case TensorExp::Kind::kBinary:
case TensorExp::Kind::kReduce:
assert(x != detail::kInvalidId && y != detail::kInvalidId && !v && o);
@@ -228,7 +241,7 @@ ExprId Merger::addTensorExp(TensorId t) {
assert(isValidTensorId(t));
const ExprId eNew(tensorExps.size());
tensorExps.emplace_back(TensorExp::Kind::kTensor, t, detail::kInvalidId,
- Value(), nullptr);
+ Value(), nullptr, nullptr);
return eNew;
}
@@ -236,28 +249,37 @@ ExprId Merger::addLoopVarExp(LoopId i) {
assert(isValidLoopId(i));
const ExprId eNew(tensorExps.size());
tensorExps.emplace_back(TensorExp::Kind::kLoopVar, i, detail::kInvalidId,
- Value(), nullptr);
+ Value(), nullptr, nullptr);
return eNew;
}
ExprId Merger::addInvariantExp(Value v) {
const ExprId eNew(tensorExps.size());
tensorExps.emplace_back(TensorExp::Kind::kInvariant, detail::kInvalidId,
- detail::kInvalidId, v, nullptr);
+ detail::kInvalidId, v, nullptr, nullptr);
return eNew;
}
-ExprId Merger::addExp(TensorExp::Kind k, ExprId e0, ExprId e1, Operation *op) {
+ExprId Merger::addSynZeroExp() {
+ const ExprId eNew(tensorExps.size());
+ tensorExps.emplace_back(TensorExp::Kind::kSynZero, detail::kInvalidId,
+ detail::kInvalidId, Value(), nullptr, nullptr);
+ return eNew;
+}
+
+ExprId Merger::addExp(TensorExp::Kind k, ExprId e0, ExprId e1, Operation *op,
+ Attribute attr) {
assert(k > TensorExp::Kind::kLoopVar);
const ExprId eNew(tensorExps.size());
- tensorExps.emplace_back(k, e0, e1, Value(), op);
+ tensorExps.emplace_back(k, e0, e1, Value(), op, attr);
return eNew;
}
-ExprId Merger::addExp(TensorExp::Kind k, ExprId e, Value v, Operation *op) {
+ExprId Merger::addExp(TensorExp::Kind k, ExprId e, Value v, Operation *op,
+ Attribute attr) {
assert(k > TensorExp::Kind::kLoopVar);
const ExprId eNew(tensorExps.size());
- tensorExps.emplace_back(k, e, detail::kInvalidId, v, op);
+ tensorExps.emplace_back(k, e, detail::kInvalidId, v, op, attr);
return eNew;
}
@@ -283,31 +305,33 @@ LatSetId Merger::addSet() {
return sNew;
}
-LatPointId Merger::conjLat(TensorExp::Kind kind, LatPointId p0, LatPointId p1,
+LatPointId Merger::conjLat(ExprId e, LatPointId p0, LatPointId p1,
Operation *op) {
+ TensorExp::Kind kind = exp(e).kind;
+ Attribute attr = exp(e).attr;
const LatPointId pNew(latPoints.size());
const auto &point0 = lat(p0);
const auto &point1 = lat(p1);
BitVector bits(point0.bits);
bits |= point1.bits;
- const ExprId e = addExp(kind, point0.exp, point1.exp, op);
- latPoints.emplace_back(bits, e);
+ const ExprId ne = addExp(kind, point0.exp, point1.exp, op, attr);
+ latPoints.emplace_back(bits, ne);
return pNew;
}
-LatSetId Merger::conjSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1,
- Operation *op) {
+LatSetId Merger::conjSet(ExprId e, LatSetId s0, LatSetId s1, Operation *op) {
const LatSetId sNew = addSet();
auto &setNew = latSets[sNew];
for (const LatPointId p0 : set(s0))
for (const LatPointId p1 : set(s1))
- setNew.push_back(conjLat(kind, p0, p1, op));
+ setNew.push_back(conjLat(e, p0, p1, op));
return sNew;
}
-LatSetId Merger::disjSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1,
- Operation *op) {
- const LatSetId sNew = conjSet(kind, s0, s1, op);
+LatSetId Merger::disjSet(ExprId e, LatSetId s0, LatSetId s1, Operation *op) {
+ const LatSetId sNew = conjSet(e, s0, s1, op);
+ TensorExp::Kind kind = exp(e).kind;
+
// Followed by all in s0.
latSets[sNew].append(latSets[s0]);
// Map binary 0-y to unary -y.
@@ -323,12 +347,35 @@ LatSetId Merger::disjSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1,
return sNew;
}
-LatSetId Merger::combiSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1,
- Operation *orig, bool includeLeft,
- TensorExp::Kind ltrans, Operation *opleft,
- bool includeRight, TensorExp::Kind rtrans,
- Operation *opright) {
- const LatSetId sNew = conjSet(kind, s0, s1, orig);
+LatSetId Merger::disjSetWithZero(ExprId e, LatSetId s0, LatSetId s1) {
+ assert(exp(e).kind == TensorExp::Kind::kCmpI ||
+ exp(e).kind == TensorExp::Kind::kCmpF);
+ const LatSetId sNew = conjSet(e, s0, s1, nullptr);
+
+ ExprId e0 = exp(e).children.e0;
+ ExprId e1 = exp(e).children.e1;
+ if (exp(e0).kind == TensorExp::Kind::kSynZero ||
+ exp(e1).kind == TensorExp::Kind::kSynZero) {
+ // lhs and rhs can't be synthetic zero at the same time.
+ assert(exp(e0).kind != exp(e1).kind);
+ // If one of the operands has already been assigned to zero (the
+ // element is absent in the corresponding operand), then we do not
+ // need to build disjunctive set for it.
+ return sNew;
+ }
+
+ auto lhsSet = mapBinWithSynZeroSet(e, s0, false);
+ auto rhsSet = mapBinWithSynZeroSet(e, s1, true);
+ latSets[sNew].append(latSets[lhsSet]);
+ latSets[sNew].append(latSets[rhsSet]);
+ return sNew;
+}
+
+LatSetId Merger::combiSet(ExprId e, LatSetId s0, LatSetId s1, Operation *orig,
+ bool includeLeft, TensorExp::Kind ltrans,
+ Operation *opleft, bool includeRight,
+ TensorExp::Kind rtrans, Operation *opright) {
+ const LatSetId sNew = conjSet(e, s0, s1, orig);
// Left Region.
if (includeLeft) {
if (opleft)
@@ -356,6 +403,23 @@ LatSetId Merger::mapSet(TensorExp::Kind kind, LatSetId s0, Value v,
return sNew;
}
+LatSetId Merger::mapBinWithSynZeroSet(ExprId e, LatSetId s0, bool lhsZero) {
+ TensorExp::Kind kind = exp(e).kind;
+ Attribute a = exp(e).attr;
+ assert(TensorExp::Kind::kMulF <= kind && kind <= TensorExp::Kind::kShlI);
+ // Must be a binary operation.
+ const LatSetId sNew = addSet();
+ auto &setNew = latSets[sNew];
+ const ExprId zeroExp = addSynZeroExp();
+ for (const LatPointId p : set(s0)) {
+ const auto &point = latPoints[p];
+ ExprId newExp = lhsZero ? addExp(kind, zeroExp, point.exp, nullptr, a)
+ : addExp(kind, point.exp, zeroExp, nullptr, a);
+ setNew.push_back(addLat(point.bits, newExp));
+ }
+ return sNew;
+}
+
LatSetId Merger::optimizeSet(LatSetId s0) {
const LatSetId sNew = addSet();
auto &setNew = latSets[sNew];
@@ -418,7 +482,8 @@ BitVector Merger::simplifyCond(LatSetId s0, LatPointId p0) {
// Slice on dense level has `locate` property as well, and can be optimized.
if (simple[b] && !isSparseLvlWithNonTrivialIdxExp(b)) {
const auto dlt = getLvlType(b);
- if (!isCompressedDLT(dlt) && !isSingletonDLT(dlt) && !isCompressedWithHiDLT(dlt)) {
+ if (!isCompressedDLT(dlt) && !isSingletonDLT(dlt) &&
+ !isCompressedWithHiDLT(dlt)) {
if (reset)
simple.reset(b);
reset = true;
@@ -505,6 +570,7 @@ bool Merger::isSingleCondition(TensorId t, ExprId e) const {
return expr.tensor == t;
case TensorExp::Kind::kInvariant:
case TensorExp::Kind::kLoopVar:
+ case TensorExp::Kind::kSynZero:
return false;
// Unary operations.
case TensorExp::Kind::kAbsF:
@@ -576,6 +642,8 @@ bool Merger::isSingleCondition(TensorId t, ExprId e) const {
case TensorExp::Kind::kSubI:
case TensorExp::Kind::kOrI:
case TensorExp::Kind::kXorI:
+ case TensorExp::Kind::kCmpF:
+ case TensorExp::Kind::kCmpI:
case TensorExp::Kind::kBinary:
return false;
}
@@ -585,7 +653,8 @@ bool Merger::isSingleCondition(TensorId t, ExprId e) const {
bool Merger::hasAnySparse(const BitVector &bits) const {
for (TensorLoopId b : bits.set_bits()) {
const auto dlt = getLvlType(b);
- if (isCompressedDLT(dlt) || isSingletonDLT(dlt) || isCompressedWithHiDLT(dlt))
+ if (isCompressedDLT(dlt) || isSingletonDLT(dlt) ||
+ isCompressedWithHiDLT(dlt))
return true;
}
return hasSparseIdxReduction(bits);
@@ -613,6 +682,8 @@ static const char *kindToOpSymbol(TensorExp::Kind kind) {
return "invariant";
case TensorExp::Kind::kLoopVar:
return "index";
+ case TensorExp::Kind::kSynZero:
+ return "0";
// Unary operations.
case TensorExp::Kind::kAbsF:
case TensorExp::Kind::kAbsC:
@@ -693,6 +764,9 @@ static const char *kindToOpSymbol(TensorExp::Kind kind) {
return ">>";
case TensorExp::Kind::kShlI:
return "<<";
+ case TensorExp::Kind::kCmpF:
+ case TensorExp::Kind::kCmpI:
+ return "cmp";
case TensorExp::Kind::kBinary:
return "binary";
case TensorExp::Kind::kReduce:
@@ -715,6 +789,9 @@ void Merger::dumpExp(ExprId e) const {
case TensorExp::Kind::kInvariant:
llvm::dbgs() << "invariant";
break;
+ case TensorExp::Kind::kSynZero:
+ llvm::dbgs() << "0";
+ break;
case TensorExp::Kind::kLoopVar:
llvm::dbgs() << "loopvar_" << expr.loop;
break;
@@ -776,11 +853,16 @@ void Merger::dumpExp(ExprId e) const {
case TensorExp::Kind::kShrS:
case TensorExp::Kind::kShrU:
case TensorExp::Kind::kShlI:
+ case TensorExp::Kind::kCmpF:
+ case TensorExp::Kind::kCmpI:
case TensorExp::Kind::kBinary:
case TensorExp::Kind::kReduce:
llvm::dbgs() << "(";
dumpExp(expr.children.e0);
- llvm::dbgs() << " " << kindToOpSymbol(expr.kind) << " ";
+ llvm::dbgs() << " " << kindToOpSymbol(expr.kind);
+ if (expr.attr)
+ llvm::dbgs() << "{" << expr.attr << "}";
+ llvm::dbgs() << " ";
dumpExp(expr.children.e1);
llvm::dbgs() << ")";
break;
@@ -839,6 +921,7 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) {
// Leaf.
case TensorExp::Kind::kTensor:
case TensorExp::Kind::kInvariant:
+ case TensorExp::Kind::kSynZero:
case TensorExp::Kind::kLoopVar: {
// Either the loop-var is really used in the tensor expression, or it is
// set to the undefined loop-var in that level. An invariant expression,
@@ -928,7 +1011,7 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) {
YieldOp absentYield = cast<YieldOp>(absentBlock.getTerminator());
const Value absentVal = absentYield.getResult();
const ExprId rhs = addInvariantExp(absentVal);
- return disjSet(kind, child0, buildLattices(rhs, i), unop);
+ return disjSet(e, child0, buildLattices(rhs, i), unop);
}
// Binary operations.
case TensorExp::Kind::kMulF:
@@ -947,7 +1030,7 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) {
{
const ExprId e0 = expr.children.e0;
const ExprId e1 = expr.children.e1;
- return conjSet(kind, buildLattices(e0, i), buildLattices(e1, i));
+ return conjSet(e, buildLattices(e0, i), buildLattices(e1, i));
}
case TensorExp::Kind::kDivF:
case TensorExp::Kind::kDivC:
@@ -970,7 +1053,7 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) {
const ExprId e0 = expr.children.e0;
const ExprId e1 = expr.children.e1;
assert(!maybeZero(e1));
- return conjSet(kind, buildLattices(e0, i), buildLattices(e1, i));
+ return conjSet(e, buildLattices(e0, i), buildLattices(e1, i));
}
case TensorExp::Kind::kAddF:
case TensorExp::Kind::kAddC:
@@ -990,7 +1073,21 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) {
{
const ExprId e0 = expr.children.e0;
const ExprId e1 = expr.children.e1;
- return disjSet(kind, buildLattices(e0, i), buildLattices(e1, i));
+ return disjSet(e, buildLattices(e0, i), buildLattices(e1, i));
+ }
+ case TensorExp::Kind::kCmpF:
+ case TensorExp::Kind::kCmpI:
+ // An comparison operation needs to be performed
+ // for the disjunction of sparse iteration spaces.
+ //
+ // x < y | !y | y |
+ // -------+-------+-------+
+ // !x | 0 | 0 < y |
+ // x | x < 0 | x < y |
+ {
+ const ExprId e0 = expr.children.e0;
+ const ExprId e1 = expr.children.e1;
+ return disjSetWithZero(e, buildLattices(e0, i), buildLattices(e1, i));
}
case TensorExp::Kind::kShrS:
case TensorExp::Kind::kShrU:
@@ -1002,7 +1099,7 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) {
const ExprId e0 = expr.children.e0;
const ExprId e1 = expr.children.e1;
assert(isInvariant(e1));
- return conjSet(kind, buildLattices(e0, i), buildLattices(e1, i));
+ return conjSet(e, buildLattices(e0, i), buildLattices(e1, i));
}
case TensorExp::Kind::kBinary:
// A custom binary operation.
@@ -1033,9 +1130,9 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) {
}
bool includeLeft = binop.getLeftIdentity() || !leftRegion.empty();
bool includeRight = binop.getRightIdentity() || !rightRegion.empty();
- return combiSet(TensorExp::Kind::kBinary, child0, child1, binop,
- includeLeft, TensorExp::Kind::kBinaryBranch, leftYield,
- includeRight, TensorExp::Kind::kBinaryBranch, rightYield);
+ return combiSet(e, child0, child1, binop, includeLeft,
+ TensorExp::Kind::kBinaryBranch, leftYield, includeRight,
+ TensorExp::Kind::kBinaryBranch, rightYield);
}
case TensorExp::Kind::kReduce:
// A custom reduce operation.
@@ -1043,7 +1140,7 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) {
const ExprId e0 = expr.children.e0;
const ExprId e1 = expr.children.e1;
Operation *const op = expr.op;
- return conjSet(kind, buildLattices(e0, i), buildLattices(e1, i), op);
+ return conjSet(e, buildLattices(e0, i), buildLattices(e1, i), op);
}
}
llvm_unreachable("unexpected expression kind");
@@ -1261,6 +1358,37 @@ std::optional<ExprId> Merger::buildTensorExp(linalg::GenericOp op, Value v) {
return addExp(TensorExp::Kind::kShrU, e0, e1);
if (isa<arith::ShLIOp>(def) && isInvariant(e1))
return addExp(TensorExp::Kind::kShlI, e0, e1);
+ if (auto ci = dyn_cast<arith::CmpIOp>(def)) {
+ if (ci.getPredicate() == arith::CmpIPredicate::eq &&
+ ci.getPredicate() == arith::CmpIPredicate::sle &&
+ ci.getPredicate() == arith::CmpIPredicate::sge &&
+ ci.getPredicate() == arith::CmpIPredicate::ule &&
+ ci.getPredicate() == arith::CmpIPredicate::uge) {
+ // We can not sparsify comparison with equal, this is because 0 <= 0
+ // yields true, and thus densifies the result.
+ return std::nullopt;
+ }
+
+ return addExp(TensorExp::Kind::kCmpI, e0, e1, nullptr,
+ ci.getPredicateAttr());
+ }
+ if (auto cf = dyn_cast<arith::CmpFOp>(def)) {
+ if (cf.getPredicate() == arith::CmpFPredicate::OEQ &&
+ cf.getPredicate() == arith::CmpFPredicate::OGE &&
+ cf.getPredicate() == arith::CmpFPredicate::OLE &&
+ cf.getPredicate() == arith::CmpFPredicate::ONE &&
+ cf.getPredicate() == arith::CmpFPredicate::UEQ &&
+ cf.getPredicate() == arith::CmpFPredicate::UGE &&
+ cf.getPredicate() == arith::CmpFPredicate::ULE &&
+ cf.getPredicate() == arith::CmpFPredicate::ORD &&
+ cf.getPredicate() == arith::CmpFPredicate::UNO) {
+ // We can not sparsify comparison with equal, this is because 0 <= 0
+ // yields true, and thus densifies the result.
+ return std::nullopt;
+ }
+ return addExp(TensorExp::Kind::kCmpF, e0, e1, nullptr,
+ cf.getPredicateAttr());
+ }
if (auto binop = dyn_cast<sparse_tensor::BinaryOp>(def)) {
if (isAdmissibleBranch(binop, binop.getOverlapRegion()) &&
(binop.getLeftIdentity() ||
@@ -1342,6 +1470,7 @@ Value Merger::buildExp(RewriterBase &rewriter, Location loc, ExprId e, Value v0,
case TensorExp::Kind::kTensor:
case TensorExp::Kind::kInvariant:
case TensorExp::Kind::kLoopVar:
+ case TensorExp::Kind::kSynZero:
llvm_unreachable("unexpected non-op");
// Unary operations.
case TensorExp::Kind::kAbsF:
@@ -1458,6 +1587,14 @@ Value Merger::buildExp(RewriterBase &rewriter, Location loc, ExprId e, Value v0,
return rewriter.create<arith::ShRUIOp>(loc, v0, v1);
case TensorExp::Kind::kShlI:
return rewriter.create<arith::ShLIOp>(loc, v0, v1);
+ case TensorExp::Kind::kCmpI: {
+ auto predicate = llvm::cast<arith::CmpIPredicateAttr>(expr.attr);
+ return rewriter.create<arith::CmpIOp>(loc, predicate, v0, v1);
+ }
+ case TensorExp::Kind::kCmpF: {
+ auto predicate = llvm::cast<arith::CmpFPredicateAttr>(expr.attr);
+ return rewriter.create<arith::CmpFOp>(loc, predicate, v0, v1);
+ }
case TensorExp::Kind::kBinaryBranch: // semi-ring ops with custom logic.
return insertYieldOp(rewriter, loc, *expr.op->getBlock()->getParent(),
{v0});
diff --git a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
index 664121aae5a37..14187063e13c2 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
@@ -52,6 +52,43 @@ func.func @add_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %arg
return %0 : tensor<32x16xf32>
}
+// CHECK-LABEL: func.func @cmp_dd(
+// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "dense" ] }>>,
+// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>,
+// CHECK-SAME: %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
+// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 32 : index
+// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 16 : index
+// CHECK-DAG: %[[VAL_5:.*]] = arith.constant false
+// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "dense" ] }>> to memref<?xf32>
+// CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
+// CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
+// CHECK: linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_10]] : memref<32x16xi1>)
+// CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] {
+// CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
+// CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_11]], %[[VAL_4]] : index
+// CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_13]], %[[VAL_12]] : index
+// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_14]]] : memref<?xf32>
+// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_11]], %[[VAL_12]]] : memref<32x16xf32>
+// CHECK: %[[VAL_17:.*]] = arith.cmpf ult, %[[VAL_15]], %[[VAL_16]] : f32
+// CHECK: memref.store %[[VAL_17]], %[[VAL_10]]{{\[}}%[[VAL_11]], %[[VAL_12]]] : memref<32x16xi1>
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_18:.*]] = bufferization.to_tensor %[[VAL_10]] : memref<32x16xi1>
+// CHECK: return %[[VAL_18]] : tensor<32x16xi1>
+// CHECK: }
+func.func @cmp_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xi1>) -> tensor<32x16xi1> {
+ %0 = linalg.generic #trait2
+ ins(%arga, %argb: tensor<32x16xf32, #Tdd>, tensor<32x16xf32>)
+ outs(%argx: tensor<32x16xi1>) {
+ ^bb(%a: f32, %b: f32, %x: i1):
+ %0 = arith.cmpf ult, %a, %b : f32
+ linalg.yield %0 : i1
+ } -> tensor<32x16xi1>
+ return %0 : tensor<32x16xi1>
+}
+
// CHECK-LABEL: func @mul_dd(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "dense" ] }>>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>,
@@ -151,6 +188,73 @@ func.func @add_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %arg
return %0 : tensor<32x16xf32>
}
+// CHECK-LABEL: func.func @cmp_ds(
+// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ] }>>,
+// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>,
+// CHECK-SAME: %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
+// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 32 : index
+// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 16 : index
+// CHECK-DAG: %[[VAL_5:.*]] = arith.constant false
+// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[VAL_8:.*]] = arith.constant true
+// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ] }>> to memref<?xindex>
+// CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ] }>> to memref<?xindex>
+// CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ] }>> to memref<?xf32>
+// CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
+// CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
+// CHECK: linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_14]] : memref<32x16xi1>)
+// CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] {
+// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_15]]] : memref<?xindex>
+// CHECK: %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_7]] : index
+// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_17]]] : memref<?xindex>
+// CHECK: %[[VAL_19:.*]]:2 = scf.while (%[[VAL_20:.*]] = %[[VAL_16]], %[[VAL_21:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) {
+// CHECK: %[[VAL_22:.*]] = arith.cmpi ult, %[[VAL_20]], %[[VAL_18]] : index
+// CHECK: scf.condition(%[[VAL_22]]) %[[VAL_20]], %[[VAL_21]] : index, index
+// CHECK: } do {
+// CHECK: ^bb0(%[[VAL_23:.*]]: index, %[[VAL_24:.*]]: index):
+// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_23]]] : memref<?xindex>
+// CHECK: %[[VAL_26:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_24]] : index
+// CHECK: scf.if %[[VAL_26]] {
+// CHECK: %[[VAL_27:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_23]]] : memref<?xf32>
+// CHECK: %[[VAL_28:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_15]], %[[VAL_24]]] : memref<32x16xf32>
+// CHECK: %[[VAL_29:.*]] = arith.cmpf ult, %[[VAL_27]], %[[VAL_28]] : f32
+// CHECK: memref.store %[[VAL_29]], %[[VAL_14]]{{\[}}%[[VAL_15]], %[[VAL_24]]] : memref<32x16xi1>
+// CHECK: } else {
+// CHECK: scf.if %[[VAL_8]] {
+// CHECK: %[[VAL_30:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_15]], %[[VAL_24]]] : memref<32x16xf32>
+// CHECK: %[[VAL_31:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_30]] : f32
+// CHECK: memref.store %[[VAL_31]], %[[VAL_14]]{{\[}}%[[VAL_15]], %[[VAL_24]]] : memref<32x16xi1>
+// CHECK: } else {
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_32:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_24]] : index
+// CHECK: %[[VAL_33:.*]] = arith.addi %[[VAL_23]], %[[VAL_7]] : index
+// CHECK: %[[VAL_34:.*]] = arith.select %[[VAL_32]], %[[VAL_33]], %[[VAL_23]] : index
+// CHECK: %[[VAL_35:.*]] = arith.addi %[[VAL_24]], %[[VAL_7]] : index
+// CHECK: scf.yield %[[VAL_34]], %[[VAL_35]] : index, index
+// CHECK: } attributes
+// CHECK: scf.for %[[VAL_36:.*]] = %[[VAL_37:.*]]#1 to %[[VAL_4]] step %[[VAL_7]] {
+// CHECK: %[[VAL_38:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_15]], %[[VAL_36]]] : memref<32x16xf32>
+// CHECK: %[[VAL_39:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_38]] : f32
+// CHECK: memref.store %[[VAL_39]], %[[VAL_14]]{{\[}}%[[VAL_15]], %[[VAL_36]]] : memref<32x16xi1>
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_40:.*]] = bufferization.to_tensor %[[VAL_14]] : memref<32x16xi1>
+// CHECK: return %[[VAL_40]] : tensor<32x16xi1>
+// CHECK: }
+func.func @cmp_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xi1>) -> tensor<32x16xi1> {
+ %0 = linalg.generic #trait2
+ ins(%arga, %argb: tensor<32x16xf32, #Tds>, tensor<32x16xf32>)
+ outs(%argx: tensor<32x16xi1>) {
+ ^bb(%a: f32, %b: f32, %x: i1):
+ %0 = arith.cmpf ult, %a, %b : f32
+ linalg.yield %0 : i1
+ } -> tensor<32x16xi1>
+ return %0 : tensor<32x16xi1>
+}
+
// CHECK-LABEL: func @mul_ds(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "dense", "compressed" ] }>>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>,
@@ -258,6 +362,78 @@ func.func @add_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %arg
return %0 : tensor<32x16xf32>
}
+// CHECK-LABEL: func.func @cmp_sd(
+// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "dense" ] }>>,
+// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>,
+// CHECK-SAME: %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
+// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 16 : index
+// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 32 : index
+// CHECK-DAG: %[[VAL_5:.*]] = arith.constant false
+// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[VAL_8:.*]] = arith.constant true
+// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "dense" ] }>> to memref<?xindex>
+// CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "dense" ] }>> to memref<?xindex>
+// CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "dense" ] }>> to memref<?xf32>
+// CHECK: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
+// CHECK: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
+// CHECK: linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_14]] : memref<32x16xi1>)
+// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_6]]] : memref<?xindex>
+// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_7]]] : memref<?xindex>
+// CHECK: %[[VAL_17:.*]]:2 = scf.while (%[[VAL_18:.*]] = %[[VAL_15]], %[[VAL_19:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) {
+// CHECK: %[[VAL_20:.*]] = arith.cmpi ult, %[[VAL_18]], %[[VAL_16]] : index
+// CHECK: scf.condition(%[[VAL_20]]) %[[VAL_18]], %[[VAL_19]] : index, index
+// CHECK: } do {
+// CHECK: ^bb0(%[[VAL_21:.*]]: index, %[[VAL_22:.*]]: index):
+// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_21]]] : memref<?xindex>
+// CHECK: %[[VAL_24:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_22]] : index
+// CHECK: scf.if %[[VAL_24]] {
+// CHECK: scf.for %[[VAL_25:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] {
+// CHECK: %[[VAL_26:.*]] = arith.muli %[[VAL_21]], %[[VAL_3]] : index
+// CHECK: %[[VAL_27:.*]] = arith.addi %[[VAL_26]], %[[VAL_25]] : index
+// CHECK: %[[VAL_28:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_27]]] : memref<?xf32>
+// CHECK: %[[VAL_29:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_22]], %[[VAL_25]]] : memref<32x16xf32>
+// CHECK: %[[VAL_30:.*]] = arith.cmpf ult, %[[VAL_28]], %[[VAL_29]] : f32
+// CHECK: memref.store %[[VAL_30]], %[[VAL_14]]{{\[}}%[[VAL_22]], %[[VAL_25]]] : memref<32x16xi1>
+// CHECK: }
+// CHECK: } else {
+// CHECK: scf.if %[[VAL_8]] {
+// CHECK: scf.for %[[VAL_31:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] {
+// CHECK: %[[VAL_32:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_22]], %[[VAL_31]]] : memref<32x16xf32>
+// CHECK: %[[VAL_33:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_32]] : f32
+// CHECK: memref.store %[[VAL_33]], %[[VAL_14]]{{\[}}%[[VAL_22]], %[[VAL_31]]] : memref<32x16xi1>
+// CHECK: }
+// CHECK: } else {
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_34:.*]] = arith.cmpi eq, %[[VAL_23]], %[[VAL_22]] : index
+// CHECK: %[[VAL_35:.*]] = arith.addi %[[VAL_21]], %[[VAL_7]] : index
+// CHECK: %[[VAL_36:.*]] = arith.select %[[VAL_34]], %[[VAL_35]], %[[VAL_21]] : index
+// CHECK: %[[VAL_37:.*]] = arith.addi %[[VAL_22]], %[[VAL_7]] : index
+// CHECK: scf.yield %[[VAL_36]], %[[VAL_37]] : index, index
+// CHECK: } attributes
+// CHECK: scf.for %[[VAL_38:.*]] = %[[VAL_39:.*]]#1 to %[[VAL_4]] step %[[VAL_7]] {
+// CHECK: scf.for %[[VAL_40:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] {
+// CHECK: %[[VAL_41:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_38]], %[[VAL_40]]] : memref<32x16xf32>
+// CHECK: %[[VAL_42:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_41]] : f32
+// CHECK: memref.store %[[VAL_42]], %[[VAL_14]]{{\[}}%[[VAL_38]], %[[VAL_40]]] : memref<32x16xi1>
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_43:.*]] = bufferization.to_tensor %[[VAL_14]] : memref<32x16xi1>
+// CHECK: return %[[VAL_43]] : tensor<32x16xi1>
+// CHECK: }
+func.func @cmp_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xi1>) -> tensor<32x16xi1> {
+ %0 = linalg.generic #trait2
+ ins(%arga, %argb: tensor<32x16xf32, #Tsd>, tensor<32x16xf32>)
+ outs(%argx: tensor<32x16xi1>) {
+ ^bb(%a: f32, %b: f32, %x: i1):
+ %0 = arith.cmpf ult, %a, %b : f32
+ linalg.yield %0 : i1
+ } -> tensor<32x16xi1>
+ return %0 : tensor<32x16xi1>
+}
+
// CHECK-LABEL: func @mul_sd(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "dense" ] }>>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>,
@@ -392,6 +568,106 @@ func.func @add_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %arg
return %0 : tensor<32x16xf32>
}
+// CHECK-LABEL: func.func @cmp_ss(
+// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>>,
+// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>,
+// CHECK-SAME: %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
+// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 32 : index
+// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 16 : index
+// CHECK-DAG: %[[VAL_5:.*]] = arith.constant false
+// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[VAL_8:.*]] = arith.constant true
+// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
+// CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
+// CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
+// CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
+// CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xf32>
+// CHECK: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
+// CHECK: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
+// CHECK: linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_16]] : memref<32x16xi1>)
+// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_6]]] : memref<?xindex>
+// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_7]]] : memref<?xindex>
+// CHECK: %[[VAL_19:.*]]:2 = scf.while (%[[VAL_20:.*]] = %[[VAL_17]], %[[VAL_21:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) {
+// CHECK: %[[VAL_22:.*]] = arith.cmpi ult, %[[VAL_20]], %[[VAL_18]] : index
+// CHECK: scf.condition(%[[VAL_22]]) %[[VAL_20]], %[[VAL_21]] : index, index
+// CHECK: } do {
+// CHECK: ^bb0(%[[VAL_23:.*]]: index, %[[VAL_24:.*]]: index):
+// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_23]]] : memref<?xindex>
+// CHECK: %[[VAL_26:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_24]] : index
+// CHECK: scf.if %[[VAL_26]] {
+// CHECK: %[[VAL_27:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_23]]] : memref<?xindex>
+// CHECK: %[[VAL_28:.*]] = arith.addi %[[VAL_23]], %[[VAL_7]] : index
+// CHECK: %[[VAL_29:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<?xindex>
+// CHECK: %[[VAL_30:.*]]:2 = scf.while (%[[VAL_31:.*]] = %[[VAL_27]], %[[VAL_32:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) {
+// CHECK: %[[VAL_33:.*]] = arith.cmpi ult, %[[VAL_31]], %[[VAL_29]] : index
+// CHECK: scf.condition(%[[VAL_33]]) %[[VAL_31]], %[[VAL_32]] : index, index
+// CHECK: } do {
+// CHECK: ^bb0(%[[VAL_34:.*]]: index, %[[VAL_35:.*]]: index):
+// CHECK: %[[VAL_36:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_34]]] : memref<?xindex>
+// CHECK: %[[VAL_37:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_35]] : index
+// CHECK: scf.if %[[VAL_37]] {
+// CHECK: %[[VAL_38:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref<?xf32>
+// CHECK: %[[VAL_39:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_24]], %[[VAL_35]]] : memref<32x16xf32>
+// CHECK: %[[VAL_40:.*]] = arith.cmpf ult, %[[VAL_38]], %[[VAL_39]] : f32
+// CHECK: memref.store %[[VAL_40]], %[[VAL_16]]{{\[}}%[[VAL_24]], %[[VAL_35]]] : memref<32x16xi1>
+// CHECK: } else {
+// CHECK: scf.if %[[VAL_8]] {
+// CHECK: %[[VAL_41:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_24]], %[[VAL_35]]] : memref<32x16xf32>
+// CHECK: %[[VAL_42:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_41]] : f32
+// CHECK: memref.store %[[VAL_42]], %[[VAL_16]]{{\[}}%[[VAL_24]], %[[VAL_35]]] : memref<32x16xi1>
+// CHECK: } else {
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_43:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_35]] : index
+// CHECK: %[[VAL_44:.*]] = arith.addi %[[VAL_34]], %[[VAL_7]] : index
+// CHECK: %[[VAL_45:.*]] = arith.select %[[VAL_43]], %[[VAL_44]], %[[VAL_34]] : index
+// CHECK: %[[VAL_46:.*]] = arith.addi %[[VAL_35]], %[[VAL_7]] : index
+// CHECK: scf.yield %[[VAL_45]], %[[VAL_46]] : index, index
+// CHECK: } attributes
+// CHECK: scf.for %[[VAL_47:.*]] = %[[VAL_48:.*]]#1 to %[[VAL_4]] step %[[VAL_7]] {
+// CHECK: %[[VAL_49:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_24]], %[[VAL_47]]] : memref<32x16xf32>
+// CHECK: %[[VAL_50:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_49]] : f32
+// CHECK: memref.store %[[VAL_50]], %[[VAL_16]]{{\[}}%[[VAL_24]], %[[VAL_47]]] : memref<32x16xi1>
+// CHECK: }
+// CHECK: } else {
+// CHECK: scf.if %[[VAL_8]] {
+// CHECK: scf.for %[[VAL_51:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
+// CHECK: %[[VAL_52:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_24]], %[[VAL_51]]] : memref<32x16xf32>
+// CHECK: %[[VAL_53:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_52]] : f32
+// CHECK: memref.store %[[VAL_53]], %[[VAL_16]]{{\[}}%[[VAL_24]], %[[VAL_51]]] : memref<32x16xi1>
+// CHECK: }
+// CHECK: } else {
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_54:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_24]] : index
+// CHECK: %[[VAL_55:.*]] = arith.addi %[[VAL_23]], %[[VAL_7]] : index
+// CHECK: %[[VAL_56:.*]] = arith.select %[[VAL_54]], %[[VAL_55]], %[[VAL_23]] : index
+// CHECK: %[[VAL_57:.*]] = arith.addi %[[VAL_24]], %[[VAL_7]] : index
+// CHECK: scf.yield %[[VAL_56]], %[[VAL_57]] : index, index
+// CHECK: } attributes
+// CHECK: scf.for %[[VAL_58:.*]] = %[[VAL_59:.*]]#1 to %[[VAL_3]] step %[[VAL_7]] {
+// CHECK: scf.for %[[VAL_60:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
+// CHECK: %[[VAL_61:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_58]], %[[VAL_60]]] : memref<32x16xf32>
+// CHECK: %[[VAL_62:.*]] = arith.cmpf ult, %[[VAL_9]], %[[VAL_61]] : f32
+// CHECK: memref.store %[[VAL_62]], %[[VAL_16]]{{\[}}%[[VAL_58]], %[[VAL_60]]] : memref<32x16xi1>
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_63:.*]] = bufferization.to_tensor %[[VAL_16]] : memref<32x16xi1>
+// CHECK: return %[[VAL_63]] : tensor<32x16xi1>
+// CHECK: }
+func.func @cmp_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xi1>) -> tensor<32x16xi1> {
+ %0 = linalg.generic #trait2
+ ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32>)
+ outs(%argx: tensor<32x16xi1>) {
+ ^bb(%a: f32, %b: f32, %x: i1):
+ %0 = arith.cmpf ult, %a, %b : f32
+ linalg.yield %0 : i1
+ } -> tensor<32x16xi1>
+ return %0 : tensor<32x16xi1>
+}
+
// CHECK-LABEL: func @mul_ss(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x16xf32>,
@@ -599,6 +875,180 @@ func.func @add_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T
return %0 : tensor<32x16xf32>
}
+// CHECK-LABEL: func.func @cmp_ss_ss(
+// CHECK-SAME: %[[VAL_0:.*0]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>>,
+// CHECK-SAME: %[[VAL_1:.*1]]: tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>>,
+// CHECK-SAME: %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
+// CHECK-DAG: %[[VAL_3:.*]] = arith.constant false
+// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
+// CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
+// CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
+// CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
+// CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xf32>
+// CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
+// CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
+// CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
+// CHECK-DAG: %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
+// CHECK-DAG: %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xf32>
+// CHECK-DAG: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
+// CHECK: linalg.fill ins(%[[VAL_3]] : i1) outs(%[[VAL_17]] : memref<32x16xi1>)
+// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK: %[[VAL_22:.*]]:2 = scf.while (%[[VAL_23:.*]] = %[[VAL_18]], %[[VAL_24:.*]] = %[[VAL_20]]) : (index, index) -> (index, index) {
+// CHECK: %[[VAL_25:.*]] = arith.cmpi ult, %[[VAL_23]], %[[VAL_19]] : index
+// CHECK: %[[VAL_26:.*]] = arith.cmpi ult, %[[VAL_24]], %[[VAL_21]] : index
+// CHECK: %[[VAL_27:.*]] = arith.andi %[[VAL_25]], %[[VAL_26]] : i1
+// CHECK: scf.condition(%[[VAL_27]]) %[[VAL_23]], %[[VAL_24]] : index, index
+// CHECK: } do {
+// CHECK: ^bb0(%[[VAL_28:.*]]: index, %[[VAL_29:.*]]: index):
+// CHECK: %[[VAL_30:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_28]]] : memref<?xindex>
+// CHECK: %[[VAL_31:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<?xindex>
+// CHECK: %[[VAL_32:.*]] = arith.cmpi ult, %[[VAL_31]], %[[VAL_30]] : index
+// CHECK: %[[VAL_33:.*]] = arith.select %[[VAL_32]], %[[VAL_31]], %[[VAL_30]] : index
+// CHECK: %[[VAL_34:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_33]] : index
+// CHECK: %[[VAL_35:.*]] = arith.cmpi eq, %[[VAL_31]], %[[VAL_33]] : index
+// CHECK: %[[VAL_36:.*]] = arith.andi %[[VAL_34]], %[[VAL_35]] : i1
+// CHECK: scf.if %[[VAL_36]] {
+// CHECK: %[[VAL_37:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_28]]] : memref<?xindex>
+// CHECK: %[[VAL_38:.*]] = arith.addi %[[VAL_28]], %[[VAL_5]] : index
+// CHECK: %[[VAL_39:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_38]]] : memref<?xindex>
+// CHECK: %[[VAL_40:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_29]]] : memref<?xindex>
+// CHECK: %[[VAL_41:.*]] = arith.addi %[[VAL_29]], %[[VAL_5]] : index
+// CHECK: %[[VAL_42:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_41]]] : memref<?xindex>
+// CHECK: %[[VAL_43:.*]]:2 = scf.while (%[[VAL_44:.*]] = %[[VAL_37]], %[[VAL_45:.*]] = %[[VAL_40]]) : (index, index) -> (index, index) {
+// CHECK: %[[VAL_46:.*]] = arith.cmpi ult, %[[VAL_44]], %[[VAL_39]] : index
+// CHECK: %[[VAL_47:.*]] = arith.cmpi ult, %[[VAL_45]], %[[VAL_42]] : index
+// CHECK: %[[VAL_48:.*]] = arith.andi %[[VAL_46]], %[[VAL_47]] : i1
+// CHECK: scf.condition(%[[VAL_48]]) %[[VAL_44]], %[[VAL_45]] : index, index
+// CHECK: } do {
+// CHECK: ^bb0(%[[VAL_49:.*]]: index, %[[VAL_50:.*]]: index):
+// CHECK: %[[VAL_51:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_49]]] : memref<?xindex>
+// CHECK: %[[VAL_52:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_50]]] : memref<?xindex>
+// CHECK: %[[VAL_53:.*]] = arith.cmpi ult, %[[VAL_52]], %[[VAL_51]] : index
+// CHECK: %[[VAL_54:.*]] = arith.select %[[VAL_53]], %[[VAL_52]], %[[VAL_51]] : index
+// CHECK: %[[VAL_55:.*]] = arith.cmpi eq, %[[VAL_51]], %[[VAL_54]] : index
+// CHECK: %[[VAL_56:.*]] = arith.cmpi eq, %[[VAL_52]], %[[VAL_54]] : index
+// CHECK: %[[VAL_57:.*]] = arith.andi %[[VAL_55]], %[[VAL_56]] : i1
+// CHECK: scf.if %[[VAL_57]] {
+// CHECK: %[[VAL_58:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_49]]] : memref<?xf32>
+// CHECK: %[[VAL_59:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_50]]] : memref<?xf32>
+// CHECK: %[[VAL_60:.*]] = arith.cmpf ult, %[[VAL_58]], %[[VAL_59]] : f32
+// CHECK: memref.store %[[VAL_60]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_54]]] : memref<32x16xi1>
+// CHECK: } else {
+// CHECK: %[[VAL_61:.*]] = arith.cmpi eq, %[[VAL_51]], %[[VAL_54]] : index
+// CHECK: scf.if %[[VAL_61]] {
+// CHECK: %[[VAL_62:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_49]]] : memref<?xf32>
+// CHECK: %[[VAL_63:.*]] = arith.cmpf ult, %[[VAL_62]], %[[VAL_6]] : f32
+// CHECK: memref.store %[[VAL_63]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_54]]] : memref<32x16xi1>
+// CHECK: } else {
+// CHECK: %[[VAL_64:.*]] = arith.cmpi eq, %[[VAL_52]], %[[VAL_54]] : index
+// CHECK: scf.if %[[VAL_64]] {
+// CHECK: %[[VAL_65:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_50]]] : memref<?xf32>
+// CHECK: %[[VAL_66:.*]] = arith.cmpf ult, %[[VAL_6]], %[[VAL_65]] : f32
+// CHECK: memref.store %[[VAL_66]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_54]]] : memref<32x16xi1>
+// CHECK: } else {
+// CHECK: }
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_67:.*]] = arith.cmpi eq, %[[VAL_51]], %[[VAL_54]] : index
+// CHECK: %[[VAL_68:.*]] = arith.addi %[[VAL_49]], %[[VAL_5]] : index
+// CHECK: %[[VAL_69:.*]] = arith.select %[[VAL_67]], %[[VAL_68]], %[[VAL_49]] : index
+// CHECK: %[[VAL_70:.*]] = arith.cmpi eq, %[[VAL_52]], %[[VAL_54]] : index
+// CHECK: %[[VAL_71:.*]] = arith.addi %[[VAL_50]], %[[VAL_5]] : index
+// CHECK: %[[VAL_72:.*]] = arith.select %[[VAL_70]], %[[VAL_71]], %[[VAL_50]] : index
+// CHECK: scf.yield %[[VAL_69]], %[[VAL_72]] : index, index
+// CHECK: } attributes
+// CHECK: scf.for %[[VAL_73:.*]] = %[[VAL_74:.*]]#0 to %[[VAL_39]] step %[[VAL_5]] {
+// CHECK: %[[VAL_75:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_73]]] : memref<?xindex>
+// CHECK: %[[VAL_76:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_73]]] : memref<?xf32>
+// CHECK: %[[VAL_77:.*]] = arith.cmpf ult, %[[VAL_76]], %[[VAL_6]] : f32
+// CHECK: memref.store %[[VAL_77]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_75]]] : memref<32x16xi1>
+// CHECK: }
+// CHECK: scf.for %[[VAL_78:.*]] = %[[VAL_79:.*]]#1 to %[[VAL_42]] step %[[VAL_5]] {
+// CHECK: %[[VAL_80:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_78]]] : memref<?xindex>
+// CHECK: %[[VAL_81:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_78]]] : memref<?xf32>
+// CHECK: %[[VAL_82:.*]] = arith.cmpf ult, %[[VAL_6]], %[[VAL_81]] : f32
+// CHECK: memref.store %[[VAL_82]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_80]]] : memref<32x16xi1>
+// CHECK: }
+// CHECK: } else {
+// CHECK: %[[VAL_83:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_33]] : index
+// CHECK: scf.if %[[VAL_83]] {
+// CHECK: %[[VAL_84:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_28]]] : memref<?xindex>
+// CHECK: %[[VAL_85:.*]] = arith.addi %[[VAL_28]], %[[VAL_5]] : index
+// CHECK: %[[VAL_86:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_85]]] : memref<?xindex>
+// CHECK: scf.for %[[VAL_87:.*]] = %[[VAL_84]] to %[[VAL_86]] step %[[VAL_5]] {
+// CHECK: %[[VAL_88:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_87]]] : memref<?xindex>
+// CHECK: %[[VAL_89:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_87]]] : memref<?xf32>
+// CHECK: %[[VAL_90:.*]] = arith.cmpf ult, %[[VAL_89]], %[[VAL_6]] : f32
+// CHECK: memref.store %[[VAL_90]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_88]]] : memref<32x16xi1>
+// CHECK: }
+// CHECK: } else {
+// CHECK: %[[VAL_91:.*]] = arith.cmpi eq, %[[VAL_31]], %[[VAL_33]] : index
+// CHECK: scf.if %[[VAL_91]] {
+// CHECK: %[[VAL_92:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_29]]] : memref<?xindex>
+// CHECK: %[[VAL_93:.*]] = arith.addi %[[VAL_29]], %[[VAL_5]] : index
+// CHECK: %[[VAL_94:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_93]]] : memref<?xindex>
+// CHECK: scf.for %[[VAL_95:.*]] = %[[VAL_92]] to %[[VAL_94]] step %[[VAL_5]] {
+// CHECK: %[[VAL_96:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_95]]] : memref<?xindex>
+// CHECK: %[[VAL_97:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_95]]] : memref<?xf32>
+// CHECK: %[[VAL_98:.*]] = arith.cmpf ult, %[[VAL_6]], %[[VAL_97]] : f32
+// CHECK: memref.store %[[VAL_98]], %[[VAL_17]]{{\[}}%[[VAL_33]], %[[VAL_96]]] : memref<32x16xi1>
+// CHECK: }
+// CHECK: } else {
+// CHECK: }
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_99:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_33]] : index
+// CHECK: %[[VAL_100:.*]] = arith.addi %[[VAL_28]], %[[VAL_5]] : index
+// CHECK: %[[VAL_101:.*]] = arith.select %[[VAL_99]], %[[VAL_100]], %[[VAL_28]] : index
+// CHECK: %[[VAL_102:.*]] = arith.cmpi eq, %[[VAL_31]], %[[VAL_33]] : index
+// CHECK: %[[VAL_103:.*]] = arith.addi %[[VAL_29]], %[[VAL_5]] : index
+// CHECK: %[[VAL_104:.*]] = arith.select %[[VAL_102]], %[[VAL_103]], %[[VAL_29]] : index
+// CHECK: scf.yield %[[VAL_101]], %[[VAL_104]] : index, index
+// CHECK: } attributes
+// CHECK: scf.for %[[VAL_105:.*]] = %[[VAL_106:.*]]#0 to %[[VAL_19]] step %[[VAL_5]] {
+// CHECK: %[[VAL_107:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_105]]] : memref<?xindex>
+// CHECK: %[[VAL_108:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_105]]] : memref<?xindex>
+// CHECK: %[[VAL_109:.*]] = arith.addi %[[VAL_105]], %[[VAL_5]] : index
+// CHECK: %[[VAL_110:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_109]]] : memref<?xindex>
+// CHECK: scf.for %[[VAL_111:.*]] = %[[VAL_108]] to %[[VAL_110]] step %[[VAL_5]] {
+// CHECK: %[[VAL_112:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_111]]] : memref<?xindex>
+// CHECK: %[[VAL_113:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_111]]] : memref<?xf32>
+// CHECK: %[[VAL_114:.*]] = arith.cmpf ult, %[[VAL_113]], %[[VAL_6]] : f32
+// CHECK: memref.store %[[VAL_114]], %[[VAL_17]]{{\[}}%[[VAL_107]], %[[VAL_112]]] : memref<32x16xi1>
+// CHECK: }
+// CHECK: }
+// CHECK: scf.for %[[VAL_115:.*]] = %[[VAL_116:.*]]#1 to %[[VAL_21]] step %[[VAL_5]] {
+// CHECK: %[[VAL_117:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_115]]] : memref<?xindex>
+// CHECK: %[[VAL_118:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_115]]] : memref<?xindex>
+// CHECK: %[[VAL_119:.*]] = arith.addi %[[VAL_115]], %[[VAL_5]] : index
+// CHECK: %[[VAL_120:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_119]]] : memref<?xindex>
+// CHECK: scf.for %[[VAL_121:.*]] = %[[VAL_118]] to %[[VAL_120]] step %[[VAL_5]] {
+// CHECK: %[[VAL_122:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_121]]] : memref<?xindex>
+// CHECK: %[[VAL_123:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_121]]] : memref<?xf32>
+// CHECK: %[[VAL_124:.*]] = arith.cmpf ult, %[[VAL_6]], %[[VAL_123]] : f32
+// CHECK: memref.store %[[VAL_124]], %[[VAL_17]]{{\[}}%[[VAL_117]], %[[VAL_122]]] : memref<32x16xi1>
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_125:.*]] = bufferization.to_tensor %[[VAL_17]] : memref<32x16xi1>
+// CHECK: return %[[VAL_125]] : tensor<32x16xi1>
+// CHECK: }
+func.func @cmp_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #Tss>, %argx: tensor<32x16xi1>) -> tensor<32x16xi1> {
+ %0 = linalg.generic #trait2
+ ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32, #Tss>)
+ outs(%argx: tensor<32x16xi1>) {
+ ^bb(%a: f32, %b: f32, %x: i1):
+ %0 = arith.cmpf ult, %a, %b : f32
+ linalg.yield %0 : i1
+ } -> tensor<32x16xi1>
+ return %0 : tensor<32x16xi1>
+}
+
#BatchedVector = #sparse_tensor.encoding<{
lvlTypes = [ "dense", "compressed-hi" ],
}>
@@ -671,22 +1121,22 @@ func.func @add_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T
// CHECK: %[[VAL_60:.*]] = arith.addi %[[VAL_31]], %[[VAL_4]] : index
// CHECK: %[[VAL_61:.*]] = arith.select %[[VAL_59]], %[[VAL_60]], %[[VAL_31]] : index
// CHECK: scf.yield %[[VAL_58]], %[[VAL_61]], %[[VAL_62:.*]] : index, index, tensor<2x3xf64, #{{.*}}>>
-// CHECK: } attributes {"Emitted from" = "linalg.generic"}
+// CHECK: } attributes
// CHECK: %[[VAL_63:.*]] = scf.for %[[VAL_64:.*]] = %[[VAL_65:.*]]#0 to %[[VAL_18]] step %[[VAL_4]] iter_args(%[[VAL_66:.*]] = %[[VAL_65]]#2)
// CHECK: %[[VAL_67:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_64]]] : memref<?xindex>
// CHECK: %[[VAL_68:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_64]]] : memref<?xf64>
// CHECK: %[[VAL_69:.*]] = sparse_tensor.insert %[[VAL_68]] into %[[VAL_66]]{{\[}}%[[VAL_13]], %[[VAL_67]]] : tensor<2x3xf64, #{{.*}}>>
// CHECK: scf.yield %[[VAL_69]] : tensor<2x3xf64, #{{.*}}>>
-// CHECK: } {"Emitted from" = "linalg.generic"}
+// CHECK: }
// CHECK: %[[VAL_70:.*]] = scf.for %[[VAL_71:.*]] = %[[VAL_72:.*]]#1 to %[[VAL_22]] step %[[VAL_4]] iter_args(%[[VAL_73:.*]] = %[[VAL_74:.*]])
// CHECK: %[[VAL_75:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_71]]] : memref<?xindex>
// CHECK: %[[VAL_76:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_71]]] : memref<?xf64>
// CHECK: %[[VAL_77:.*]] = arith.negf %[[VAL_76]] : f64
// CHECK: %[[VAL_78:.*]] = sparse_tensor.insert %[[VAL_77]] into %[[VAL_73]]{{\[}}%[[VAL_13]], %[[VAL_75]]] : tensor<2x3xf64, #{{.*}}>>
// CHECK: scf.yield %[[VAL_78]] : tensor<2x3xf64, #{{.*}}>>
-// CHECK: } {"Emitted from" = "linalg.generic"}
+// CHECK: }
// CHECK: scf.yield %[[VAL_79:.*]] : tensor<2x3xf64, #{{.*}}>>
-// CHECK: } {"Emitted from" = "linalg.generic"}
+// CHECK: }
// CHECK: %[[VAL_80:.*]] = sparse_tensor.load %[[VAL_81:.*]] hasInserts : tensor<2x3xf64, #{{.*}}>>
// CHECK: return %[[VAL_80]] : tensor<2x3xf64, #{{.*}}>>
// CHECK: }
@@ -1140,9 +1590,9 @@ func.func @scale(%arga: tensor<?x?xf64, #Tds>, %argx: tensor<?x?xf64>) -> tensor
// CHECK: %[[VAL_30:.*]] = arith.mulf %[[VAL_27]], %[[VAL_29]] : f32
// CHECK: %[[VAL_31:.*]] = arith.addf %[[VAL_26]], %[[VAL_30]] : f32
// CHECK: memref.store %[[VAL_31]], %[[VAL_14]]{{\[}}%[[VAL_18]], %[[VAL_25]]] : memref<?x?xf32>
-// CHECK: } {"Emitted from" = "linalg.generic"}
-// CHECK: } {"Emitted from" = "linalg.generic"}
-// CHECK: } {"Emitted from" = "linalg.generic"}
+// CHECK: }
+// CHECK: }
+// CHECK: }
// CHECK: %[[VAL_32:.*]] = bufferization.to_tensor %[[VAL_14]] : memref<?x?xf32>
// CHECK: return %[[VAL_32]] : tensor<?x?xf32>
// CHECK: }
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cmp.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cmp.mlir
new file mode 100644
index 0000000000000..33861a79f6805
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cmp.mlir
@@ -0,0 +1,146 @@
+// DEFINE: %{option} = "enable-runtime-library=false"
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler=%{option}
+// DEFINE: %{run} = mlir-cpu-runner \
+// DEFINE: -e entry -entry-point-result=void \
+// DEFINE: -shared-libs=%mlir_c_runner_utils | \
+// DEFINE: FileCheck %s
+//
+// RUN: %{compile} | %{run}
+//
+// Do the same run, but now with direct IR generation and vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"
+// RUN: %{compile} | %{run}
+
+// Do the same run, but now with direct IR generation and, if available, VLA
+// vectorization.
+// REDEFINE: %{option} = "enable-runtime-library=false vl=4 enable-arm-sve=%ENABLE_VLA"
+// REDEFINE: %{run} = %lli_host_or_aarch64_cmd \
+// REDEFINE: --entry-function=entry_lli \
+// REDEFINE: --extra-module=%S/Inputs/main_for_lli.ll \
+// REDEFINE: %VLA_ARCH_ATTR_OPTIONS \
+// REDEFINE: --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// REDEFINE: FileCheck %s
+// RUN: %{compile} | mlir-translate -mlir-to-llvmir | %{run}
+
+#DCSR = #sparse_tensor.encoding<{
+ lvlTypes = [ "compressed", "compressed" ]
+}>
+
+#trait = {
+ indexing_maps = [
+ affine_map<(i,j) -> (i,j)>, // A
+ affine_map<(i,j) -> (i,j)>, // B
+ affine_map<(i,j) -> (i,j)> // x (out)
+ ],
+ iterator_types = ["parallel", "parallel"],
+ doc = "X(i, j) = cmp A(i,j) B(i, j)"
+}
+
+//
+// Integration test that lowers a kernel annotated as sparse to
+// actual sparse code, initializes a matching sparse storage scheme
+// from file, and runs the resulting code with the JIT compiler.
+//
+module {
+ func.func @cmp_all_dense(%arga: tensor<4x4xf64>,
+ %argb: tensor<4x4xf64>,
+ %argx: tensor<4x4xi8>) -> tensor<4x4xi8> {
+ %0 = linalg.generic #trait
+ ins(%arga, %argb: tensor<4x4xf64>, tensor<4x4xf64>)
+ outs(%argx: tensor<4x4xi8>) {
+ ^bb(%a: f64, %b: f64, %x: i8):
+ %0 = arith.cmpf ult, %a, %b : f64
+ %1 = arith.extui %0 : i1 to i8
+ linalg.yield %1 : i8
+ } -> tensor<4x4xi8>
+ return %0 : tensor<4x4xi8>
+ }
+
+ func.func @cmp_lhs_sparse(%arga: tensor<4x4xf64, #DCSR>,
+ %argb: tensor<4x4xf64>) -> tensor<4x4xi8, #DCSR> {
+ %argx = bufferization.alloc_tensor() : tensor<4x4xi8, #DCSR>
+ %0 = linalg.generic #trait
+ ins(%arga, %argb: tensor<4x4xf64, #DCSR>, tensor<4x4xf64>)
+ outs(%argx: tensor<4x4xi8, #DCSR>) {
+ ^bb(%a: f64, %b: f64, %x: i8):
+ %0 = arith.cmpf ult, %a, %b : f64
+ %1 = arith.extui %0 : i1 to i8
+ linalg.yield %1 : i8
+ } -> tensor<4x4xi8, #DCSR>
+ return %0 : tensor<4x4xi8, #DCSR>
+ }
+
+ func.func @cmp_all_sparse(%arga: tensor<4x4xf64, #DCSR>,
+ %argb: tensor<4x4xf64, #DCSR>) -> tensor<4x4xi8, #DCSR> {
+ %argx = bufferization.alloc_tensor() : tensor<4x4xi8, #DCSR>
+ %0 = linalg.generic #trait
+ ins(%arga, %argb: tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>)
+ outs(%argx: tensor<4x4xi8, #DCSR>) {
+ ^bb(%a: f64, %b: f64, %x: i8):
+ %0 = arith.cmpf ult, %a, %b : f64
+ %1 = arith.extui %0 : i1 to i8
+ linalg.yield %1 : i8
+ } -> tensor<4x4xi8, #DCSR>
+ return %0 : tensor<4x4xi8, #DCSR>
+ }
+
+ //
+ // Main driver that constructs matrix and calls the sparse kernel to perform
+ // element-wise comparison.
+ //
+ func.func @entry() {
+ %d0 = arith.constant 0 : i8
+ %c0 = arith.constant 0 : index
+
+ %lhs_dn = arith.constant dense<
+ [ [ 0.0, 0.0, 1.5, 1.0],
+ [ 0.0, 3.5, 0.0, 0.0],
+ [ 1.0, 5.0, 2.0, 0.0],
+ [ 1.0, 0.5, 0.0, 0.0] ]> : tensor<4x4xf64>
+
+ %rhs_dn = arith.constant dense<
+ [ [ 0.0, 1.5, 1.0, 1.5],
+ [ 3.5, 0.0, 0.0, 0.0],
+ [ 5.0, 2.0, 0.0, 2.0],
+ [ 0.5, 0.0, 0.0, 0.0] ]> : tensor<4x4xf64>
+
+ %lhs_sp = sparse_tensor.convert %lhs_dn : tensor<4x4xf64> to tensor<4x4xf64, #DCSR>
+ %rhs_sp = sparse_tensor.convert %rhs_dn : tensor<4x4xf64> to tensor<4x4xf64, #DCSR>
+
+ %output = arith.constant dense<0> : tensor<4x4xi8>
+ %all_dn_out = call @cmp_all_dense(%lhs_dn, %rhs_dn, %output)
+ : (tensor<4x4xf64>, tensor<4x4xf64>, tensor<4x4xi8>) -> tensor<4x4xi8>
+ %lhs_sp_out = call @cmp_lhs_sparse(%lhs_sp, %rhs_dn)
+ : (tensor<4x4xf64, #DCSR>, tensor<4x4xf64>) -> tensor<4x4xi8, #DCSR>
+ %all_sp_out = call @cmp_all_sparse(%lhs_sp, %rhs_sp)
+ : (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xi8, #DCSR>
+
+ //
+ // All should have the same result.
+ //
+ // CHECK-COUNT-3: ( ( 0, 1, 0, 1 ), ( 1, 0, 0, 0 ), ( 1, 0, 0, 1 ), ( 0, 0, 0, 0 ) )
+ %v = vector.transfer_read %all_dn_out[%c0, %c0], %d0
+ : tensor<4x4xi8>, vector<4x4xi8>
+ vector.print %v : vector<4x4xi8>
+
+ %lhs_sp_ret = sparse_tensor.convert %lhs_sp_out
+ : tensor<4x4xi8, #DCSR> to tensor<4x4xi8>
+ %v1 = vector.transfer_read %lhs_sp_ret[%c0, %c0], %d0
+ : tensor<4x4xi8>, vector<4x4xi8>
+ vector.print %v1 : vector<4x4xi8>
+
+ %rhs_sp_ret = sparse_tensor.convert %all_sp_out
+ : tensor<4x4xi8, #DCSR> to tensor<4x4xi8>
+ %v2 = vector.transfer_read %rhs_sp_ret[%c0, %c0], %d0
+ : tensor<4x4xi8>, vector<4x4xi8>
+ vector.print %v2 : vector<4x4xi8>
+
+
+ bufferization.dealloc_tensor %lhs_sp : tensor<4x4xf64, #DCSR>
+ bufferization.dealloc_tensor %rhs_sp : tensor<4x4xf64, #DCSR>
+ bufferization.dealloc_tensor %lhs_sp_out : tensor<4x4xi8, #DCSR>
+ bufferization.dealloc_tensor %all_sp_out : tensor<4x4xi8, #DCSR>
+
+ return
+ }
+}
diff --git a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp
index 38c3b363b585f..b854ce8d7aa8a 100644
--- a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp
+++ b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp
@@ -34,7 +34,9 @@ namespace {
DO(subi, TensorExp::Kind::kSubI) \
DO(andi, TensorExp::Kind::kAndI) \
DO(xori, TensorExp::Kind::kXorI) \
- DO(ori, TensorExp::Kind::kOrI)
+ DO(ori, TensorExp::Kind::kOrI) \
+ DO(cmpf, TensorExp::Kind::kCmpF) \
+ DO(cmpi, TensorExp::Kind::kCmpI)
// TODO: Disjunctive binary operations that need special handling are not
// included, e.g., Division are not tested (for now) as it need a constant
@@ -109,6 +111,7 @@ struct Pattern {
/// Constructors.
/// Rather than using these, please use the readable helper constructor
/// functions below to make tests more readable.
+ Pattern() : kind(TensorExp::Kind::kSynZero) {}
Pattern(TensorId tid) : kind(TensorExp::Kind::kTensor), tid(tid) {}
Pattern(TensorExp::Kind kind, PatternRef e0, PatternRef e1)
: kind(kind), children(e0, e1) {
@@ -122,6 +125,7 @@ struct Pattern {
///
static Pattern tensorPattern(TensorId tid) { return Pattern(tid); }
+static Pattern synZeroPattern() { return Pattern(); }
#define IMPL_BINOP_PATTERN(OP, KIND) \
LLVM_ATTRIBUTE_UNUSED static Pattern OP##Pattern(PatternRef e0, \
@@ -232,6 +236,9 @@ class MergerTestBase : public ::testing::Test {
// Leaf.
case TensorExp::Kind::kTensor:
return tensorExp.tensor == pattern.tid;
+ case TensorExp::Kind::kSynZero:
+ // Already checked kind equivalence @L233
+ return true;
case TensorExp::Kind::kInvariant:
llvm_unreachable("invariant not handled yet");
case TensorExp::Kind::kLoopVar:
@@ -289,6 +296,8 @@ class MergerTestBase : public ::testing::Test {
case TensorExp::Kind::kAndI:
case TensorExp::Kind::kOrI:
case TensorExp::Kind::kXorI:
+ case TensorExp::Kind::kCmpF:
+ case TensorExp::Kind::kCmpI:
case TensorExp::Kind::kShrS:
case TensorExp::Kind::kShrU:
case TensorExp::Kind::kShlI:
@@ -752,6 +761,79 @@ FOREVERY_COMMON_DISJ_BINOP(IMPL_MERGER_TEST_OPTIMIZED_DISJ)
FOREVERY_COMMON_CONJ_BINOP(IMPL_MERGER_TEST_OPTIMIZED_CONJ)
+/// Vector element-wise comparison (disjunction) of 2 vectors. i.e.;
+/// a(i) = b(i) + c(i)
+/// which should form the 3 lattice points
+/// {
+/// lat( i_00 i_01 / (tensor_0 cmp tensor_1) )
+/// lat( i_00 / tensor_0 cmp 0 )
+/// lat( i_01 / 0 cmp tensor_1 )
+/// }
+/// and after optimization, the lattice points do not change (as there is no
+/// duplicated point and all input vectors are sparse vector).
+/// {
+/// lat( i_00 i_01 / (tensor_0 cmp tensor_1) )
+/// lat( i_00 / tensor_0 cmp 0 )
+/// lat( i_01 / 0 cmp tensor_1 )
+/// }
+TEST_F(MergerTest3T1L, vector_cmp) {
+ const auto e = cmpiExpr(tensor(0), tensor(1));
+ const auto l0 = lid(0);
+ const auto t0 = tid(0);
+ const auto t1 = tid(1);
+ PatternRef zero = synZeroPattern();
+ PatternRef p0 = tensorPattern(t0);
+ PatternRef p1 = tensorPattern(t1);
+ auto s = merger.buildLattices(e, l0);
+ expectLatPoint(s, 0, cmpiPattern(p0, p1), loopsToBits({{l0, t0}, {l0, t1}}));
+ expectLatPointWithinRange(s, 1, 2, cmpiPattern(p0, zero),
+ loopsToBits({{l0, t0}}));
+ expectLatPointWithinRange(s, 1, 2, cmpiPattern(zero, p1),
+ loopsToBits({{l0, t1}}));
+ s = merger.optimizeSet(s);
+ expectLatPoint(s, 0, cmpiPattern(p0, p1), loopsToBits({{l0, t0}, {l0, t1}}));
+ expectLatPointWithinRange(s, 1, 2, cmpiPattern(p0, zero),
+ loopsToBits({{l0, t0}}));
+ expectLatPointWithinRange(s, 1, 2, cmpiPattern(zero, p1),
+ loopsToBits({{l0, t1}}));
+}
+
+/// Vector element-wise comparsion (disjunction) of 2 vectors, i.e.;
+/// a(i) = b(i) cmp c(i)
+/// which should form the 3 lattice points
+/// {
+/// lat( i_00 i_01 / (sparse_tensor_0 cmp dense_tensor_1) )
+/// lat( i_00 / sparse_tensor_0 cmp 0)
+/// lat( i_01 / 0 cmp dense_tensor_1 )
+/// }
+/// which should be optimized to
+/// {
+/// lat( i_00 i_01 / (sparse_tensor_0 cmp dense_tensor_1) ) (not singleton)
+/// lat( i_01 / 0 cmp dense_tensor_0 ) ()
+/// }
+///
+/// lat( i_00 / sparse_tensor_0 ) should be opted out as it only has dense
diff
+/// with lat( i_00 i_01 / (sparse_tensor_0 cmp dense_tensor_1) ).
+TEST_F(MergerTest3T1LD, vector_cmp) {
+ const auto e = cmpiExpr(tensor(0), tensor(1));
+ const auto l0 = lid(0);
+ const auto t0 = tid(0);
+ const auto t1 = tid(1);
+ PatternRef zero = synZeroPattern();
+ PatternRef p0 = tensorPattern(t0);
+ PatternRef p1 = tensorPattern(t1);
+ auto s = merger.buildLattices(e, l0);
+ expectLatPoint(s, 0, cmpiPattern(p0, p1), loopsToBits({{l0, t0}, {l0, t1}}));
+ expectLatPointWithinRange(s, 1, 2, cmpiPattern(p0, zero),
+ loopsToBits({{l0, t0}}));
+ expectLatPointWithinRange(s, 1, 2, cmpiPattern(zero, p1),
+ loopsToBits({{l0, t1}}));
+ s = merger.optimizeSet(s);
+ expectLatPoint(s, 0, cmpiPattern(p0, p1), loopsToBits({{l0, t0}, {l0, t1}}));
+ expectLatPointWithinRange(s, 1, 2, cmpiPattern(zero, p1),
+ loopsToBits({{l0, t1}}));
+}
+
#undef IMPL_MERGER_TEST_OPTIMIZED_CONJ
// TODO: mult-dim tests
More information about the Mlir-commits
mailing list