[Mlir-commits] [mlir] fb28733 - [mlir][sparse] support affine expression on dense dimensions (except constant affine)
Peiming Liu
llvmlistbot at llvm.org
Mon Nov 21 17:31:25 PST 2022
Author: Peiming Liu
Date: 2022-11-22T01:31:19Z
New Revision: fb287335419a9d2976d4ed3ae74bcc6cdd0565c3
URL: https://github.com/llvm/llvm-project/commit/fb287335419a9d2976d4ed3ae74bcc6cdd0565c3
DIFF: https://github.com/llvm/llvm-project/commit/fb287335419a9d2976d4ed3ae74bcc6cdd0565c3.diff
LOG: [mlir][sparse] support affine expression on dense dimensions (except constant affine)
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D138169
Added:
Modified:
mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h
mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
mlir/test/Dialect/SparseTensor/sparse_affine.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
index dc27e85d3e716..7f9f0e9f0e0d2 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
@@ -156,8 +156,12 @@ class Merger {
Merger(unsigned t, unsigned l)
: outTensor(t - 1), syntheticTensor(t), numTensors(t + 1), numLoops(l),
hasSparseOut(false),
- dimTypes(t + 1, std::vector<DimLevelType>(l, DimLevelType::Undef)),
- loopIdxToDim(t + 1, std::vector<Optional<unsigned>>(l, llvm::None)) {}
+ dimTypes(numTensors,
+ std::vector<DimLevelType>(numLoops, DimLevelType::Undef)),
+ loopIdxToDim(numTensors,
+ std::vector<Optional<unsigned>>(numLoops, llvm::None)),
+ dimToLoopIdx(numTensors,
+ std::vector<Optional<unsigned>>(numLoops, llvm::None)) {}
/// Adds a tensor expression. Returns its index.
unsigned addExp(Kind k, unsigned e0, unsigned e1 = -1u, Value v = Value(),
@@ -258,6 +262,11 @@ class Merger {
return getDimLevelType(tensor(b), index(b));
}
+ Optional<unsigned> getLoopIdx(unsigned t, unsigned dim) const {
+ assert(t < numTensors && dim < numLoops);
+ return dimToLoopIdx[t][dim];
+ }
+
/// Gets the dimension number of the the `t`th tensor on `i`th loop.
Optional<unsigned> getDimNum(unsigned t, unsigned i) const {
assert(t < numTensors && i < numLoops);
@@ -276,6 +285,8 @@ class Merger {
assert(isValidDLT(dlt));
dimTypes[t][i] = dlt;
loopIdxToDim[t][i] = dim;
+ assert(dim < numLoops);
+ dimToLoopIdx[t][dim] = i;
}
// Iterates the bits of a lattice, for each set bit, converts it into the
@@ -341,6 +352,8 @@ class Merger {
std::vector<std::vector<DimLevelType>> dimTypes;
// Map that converts pair<tensor id, loop id> to the corresponding dimension.
std::vector<std::vector<Optional<unsigned>>> loopIdxToDim;
+ // Map that converts pair<tensor id, dim> to the corresponding loop id.
+ std::vector<std::vector<Optional<unsigned>>> dimToLoopIdx;
llvm::SmallVector<TensorExp> tensorExps;
llvm::SmallVector<LatPoint> latPoints;
llvm::SmallVector<SmallVector<unsigned>> latSets;
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
index 4460c1840281b..e94224db2b347 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
@@ -329,6 +329,13 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim(
return loop;
}
+void SparseTensorLoopEmitter::genDenseAffineAddressAtCurLevel(
+ OpBuilder &builder, Location loc, size_t tid, size_t dim,
+ AffineExpr affine) {
+ Value affineV = genAffine(builder, affine, loc);
+ pidxs[tid][dim] = genAddress(builder, loc, tid, dim, affineV);
+}
+
Operation *SparseTensorLoopEmitter::enterCoIterationOverTensorsAtDims(
OpBuilder &builder, Location loc, ArrayRef<size_t> tids,
ArrayRef<size_t> dims, bool needsUniv, MutableArrayRef<Value> reduc,
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h
index 4190d58c5731e..2987490e25907 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h
@@ -338,7 +338,6 @@ inline bool isZeroRankedTensorOrScalar(Type type) {
// loopEmiter.exitCurrentLoop(); // exit i
//===----------------------------------------------------------------------===//
-// TODO: Sparsification should also rely on this class to generate loops.
class SparseTensorLoopEmitter {
public:
/// Optional callback function to setup dense output tensors when
@@ -407,6 +406,10 @@ class SparseTensorLoopEmitter {
ArrayRef<size_t> extraTids = {},
ArrayRef<size_t> extraDims = {});
+ void genDenseAffineAddressAtCurLevel(OpBuilder &builder, Location loc,
+ size_t tid, size_t dim,
+ AffineExpr affine);
+
/// Emits a co-iteration loop over a set of tensors.
Operation *enterCoIterationOverTensorsAtDims(
OpBuilder &builder, Location loc, ArrayRef<size_t> tids,
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
index 801f33bf2ffc0..55c9463b2d73d 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -27,6 +27,7 @@
#include "mlir/Dialect/SparseTensor/Transforms/Passes.h"
#include "mlir/Dialect/SparseTensor/Utils/Merger.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/IR/AffineExprVisitor.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/TensorEncoding.h"
#include "llvm/ADT/SmallBitVector.h"
@@ -98,6 +99,35 @@ struct CodeGen {
}
};
+/// A helper class that visits an affine expression and tries to find an
+/// AffineDimExpr to which the corresponding iterator from a GenericOp matches
+/// the desired iterator type.
+class AffineDimFinder : public AffineExprVisitor<AffineDimFinder> {
+public:
+ explicit AffineDimFinder(linalg::GenericOp op)
+ : iterTypes(op.getIteratorTypesArray()) {}
+ void visitDimExpr(AffineDimExpr expr) {
+ if (pickedDim == nullptr || pickIterType == iterTypes[expr.getPosition()]) {
+ pickedDim = expr;
+ }
+ }
+
+ /// Set the desired iterator type that we want to pick.
+ void setPickedIterType(utils::IteratorType iterType) {
+ pickIterType = iterType;
+ }
+
+ /// Get the desired AffineDimExpr.
+ AffineDimExpr getDimExpr() const { return pickedDim.cast<AffineDimExpr>(); }
+
+private:
+ /// The picked AffineDimExpr after visit.
+ AffineExpr pickedDim;
+ /// The iterator type that we want.
+ utils::IteratorType pickIterType;
+ /// The mapping between dim=>iterator type.
+ SmallVector<utils::IteratorType> iterTypes;
+};
} // namespace
//===----------------------------------------------------------------------===//
@@ -219,25 +249,45 @@ static bool topSortOptimal(unsigned n,
/// Helper method to add all constraints from the indices in one affine
/// expression before all indices in the other affine expression. For
/// example i0+i1 < i2+i3+1 yields i0<i2, i0<i3, i1<i2, and i1<i3.
+/// The affine expression `a` is empty iff `fidx` have a value, leading to
+/// b = (i0 + i1) < fidx => i0 < fidx, i1 < fidx.
+/// The affine expression `b` is empty iff `tidx` have a value, leading to
+/// tidx < a = (i0 + i1) => tidx < i0, tidx < i1.
static void addAffineOrderings(std::vector<std::vector<bool>> &adjM,
std::vector<unsigned> &inDegree, AffineExpr a,
- AffineExpr b, unsigned fidx) {
- switch (a.getKind()) {
- case AffineExprKind::DimId: {
- unsigned idx = a.cast<AffineDimExpr>().getPosition();
- if (b)
- addAffineOrderings(adjM, inDegree, b, AffineExpr(), idx);
- else if (!adjM[fidx][idx]) {
- adjM[fidx][idx] = true;
- inDegree[idx]++;
+ AffineExpr b, Optional<unsigned> fidx,
+ Optional<unsigned> tidx) {
+ if (!a && !b) {
+ // Recursion leaf.
+ assert(fidx && tidx);
+ unsigned f = *fidx, t = *tidx;
+ if (!adjM[f][t]) {
+ adjM[f][t] = true;
+ inDegree[t]++;
}
+ return;
+ }
+ // Picks an affine expression and expand (recurse into) it.
+ auto toExpand = a ? a : b;
+ switch (toExpand.getKind()) {
+ case AffineExprKind::DimId: {
+ auto idx = toExpand.cast<AffineDimExpr>().getPosition();
+ if (toExpand == a)
+ addAffineOrderings(adjM, inDegree, AffineExpr(), b, idx, tidx);
+ else // toExpand == b
+ addAffineOrderings(adjM, inDegree, a, AffineExpr(), fidx, idx);
break;
}
case AffineExprKind::Add:
case AffineExprKind::Mul: {
- auto binOp = a.cast<AffineBinaryOpExpr>();
- addAffineOrderings(adjM, inDegree, binOp.getLHS(), b, fidx);
- addAffineOrderings(adjM, inDegree, binOp.getRHS(), b, fidx);
+ auto binOp = toExpand.cast<AffineBinaryOpExpr>();
+ if (toExpand == a) {
+ addAffineOrderings(adjM, inDegree, binOp.getLHS(), b, fidx, tidx);
+ addAffineOrderings(adjM, inDegree, binOp.getRHS(), b, fidx, tidx);
+ } else {
+ addAffineOrderings(adjM, inDegree, a, binOp.getLHS(), fidx, tidx);
+ addAffineOrderings(adjM, inDegree, a, binOp.getRHS(), fidx, tidx);
+ }
break;
}
default:
@@ -245,11 +295,46 @@ static void addAffineOrderings(std::vector<std::vector<bool>> &adjM,
}
}
-/// Computes a topologically sorted iteration graph for the linalg operation.
-/// Ensures all tensors are visited in natural index order. This is essential
-/// for sparse storage formats since these only support access along fixed
-/// dimensions. Even for dense storage formats, however, the natural index
-/// order yields innermost unit-stride access with better spatial locality.
+static void tryLoosenAffineDenseConstraints(linalg::GenericOp op,
+ Optional<unsigned> &fldx,
+ AffineExpr &fa,
+ Optional<unsigned> &tldx,
+ AffineExpr &ta) {
+ // We use a heuristic here to only pick one dim expression from each
+ // compound affine expression to establish the order between two dense
+ // dimensions.
+ if (!tldx) {
+ AffineDimFinder finder(op);
+ // NOTE: The ordering can only be loosen when the destination level is
+ // dense (when !tldx), for [dense, sparse] -> (d0 + d1, d2), we still
+ // require both d0 < d2 and d1 < d2 to ensure correct ordering (i.e.,
+ // no ordering like d0->d2->d1).
+ // TODO: this is obviously a sub optimal solution.
+ if (!fldx && !fa.isa<AffineConstantExpr>()) {
+ // Heuristic: we prefer parallel loop for lhs to reduce the chance
+ // we add reduce < parallel ordering.
+ finder.setPickedIterType(utils::IteratorType::parallel);
+ finder.walkPostOrder(fa);
+ fa = finder.getDimExpr();
+ fldx = finder.getDimExpr().getPosition();
+ }
+ if (!ta.isa<AffineConstantExpr>()) {
+ // Heuristic: we prefer reduction loop for rhs to reduce the chance
+ // addint reduce < parallel ordering.
+ finder.setPickedIterType(utils::IteratorType::reduction);
+ finder.walkPostOrder(ta);
+ ta = finder.getDimExpr();
+ tldx = finder.getDimExpr().getPosition();
+ }
+ }
+}
+
+/// Computes a topologically sorted iteration graph for the linalg
+/// operation. Ensures all tensors are visited in natural index order. This
+/// is essential for sparse storage formats since these only support access
+/// along fixed dimensions. Even for dense storage formats, however, the
+/// natural index order yields innermost unit-stride access with better
+/// spatial locality.
static bool computeIterationGraph(Merger &merger, linalg::GenericOp op,
std::vector<unsigned> &topSort, unsigned mask,
OpOperand *skip = nullptr) {
@@ -275,10 +360,25 @@ static bool computeIterationGraph(Merger &merger, linalg::GenericOp op,
// by default) puts an ordering constraint on the loop indices. For
// example, the tensor expresion A_ijk forces the ordering i < j < k
// on the loop indices if no explicit dimension ordering is given.
- for (unsigned d = 1, rank = map.getNumResults(); d < rank; d++) {
- AffineExpr f = map.getResult(toOrigDim(enc, d - 1));
- AffineExpr t = map.getResult(toOrigDim(enc, d));
- addAffineOrderings(adjM, inDegree, f, t, 0);
+ for (unsigned d = 0, rank = map.getNumResults(); d < rank; d++) {
+ AffineExpr ta = map.getResult(toOrigDim(enc, d));
+ Optional<unsigned> tldx = merger.getLoopIdx(t.getOperandNumber(), d);
+
+ if (d > 0) {
+ AffineExpr fa = map.getResult(toOrigDim(enc, d - 1));
+ Optional<unsigned> fldx =
+ merger.getLoopIdx(t.getOperandNumber(), d - 1);
+
+ // Applying order constraints on every pair of dimExpr between two
+ // compound affine expressions can sometime too strict:
+ // E.g, for [dense, dense] -> (d0 + d1, d2 + d3).
+ // It is totally fine to have loop sequence d0->d2->d1->d3 instead of
+ // requiring d0 < d2, d1 < d2, d0 < d3, d1 < d3.
+ if (!(mask & SortMask::kIncludeDense))
+ tryLoosenAffineDenseConstraints(op, fldx, fa, tldx, ta);
+
+ addAffineOrderings(adjM, inDegree, fa, ta, fldx, tldx);
+ }
}
// Push unrelated loops into sparse iteration space, so these
// will be skipped more often.
@@ -504,10 +604,6 @@ static Value genSubscript(CodeGen &codegen, OpBuilder &builder,
auto enc = getSparseTensorEncoding(t->get().getType());
unsigned rank = map.getNumResults();
if (enc) {
- // Note that currently, all sparse subscripts are simple.
- // TODO: accept affine too?
- assert(map.getResult(toOrigDim(enc, rank - 1)).getKind() ==
- AffineExprKind::DimId);
Value pidx = codegen.loopEmitter.getPidxs()[tensor].back();
assert(pidx);
args.push_back(pidx); // position index
@@ -750,8 +846,11 @@ static bool isInvariantAffine(const CodeGen &codegen, AffineExpr a,
switch (a.getKind()) {
case AffineExprKind::DimId: {
unsigned idx = a.cast<AffineDimExpr>().getPosition();
- if (idx == ldx)
+ if (idx == ldx) {
atLevel = true;
+ // Must be invariant if we are at the level.
+ return true;
+ }
return codegen.getLoopIdxValue(idx) != nullptr; // no longer in play?
}
case AffineExprKind::Add:
@@ -1090,12 +1189,13 @@ static bool startLoopSeq(Merger &merger, CodeGen &codegen, OpBuilder &builder,
return false;
}
-static void translateBitsToTidDimPairs(Merger &merger, CodeGen &codegen,
- unsigned li, unsigned idx,
- SmallVectorImpl<size_t> &condTids,
- SmallVectorImpl<size_t> &condDims,
- SmallVectorImpl<size_t> &extraTids,
- SmallVectorImpl<size_t> &extraDims) {
+static void translateBitsToTidDimPairs(
+ Merger &merger, CodeGen &codegen, linalg::GenericOp op, unsigned li,
+ unsigned idx, SmallVectorImpl<size_t> &condTids,
+ SmallVectorImpl<size_t> &condDims, SmallVectorImpl<size_t> &extraTids,
+ SmallVectorImpl<size_t> &extraDims, SmallVectorImpl<size_t> &affineTids,
+ SmallVectorImpl<size_t> &affineDims, SmallVectorImpl<AffineExpr> &exps) {
+
const BitVector &all = merger.lat(li).bits;
const BitVector &simple = merger.lat(li).simple;
@@ -1123,6 +1223,53 @@ static void translateBitsToTidDimPairs(Merger &merger, CodeGen &codegen,
// TODO: get rid of extraTids and extraDims.
extraTids.push_back(tid);
extraDims.push_back(dim.value());
+ } else {
+ assert(isUndefDLT(dlt));
+ if (tid >= op.getNumDpsInputs())
+ // We only handle affine expression on input tensors (for now).
+ return;
+ OpOperand *operand = &op->getOpOperand(tid);
+ auto enc = getSparseTensorEncoding(operand->get().getType());
+ // Non-annotated dense tensors requires no special handling.
+ if (!enc)
+ return;
+
+ ArrayRef<AffineExpr> affines =
+ op.getMatchingIndexingMap(operand).getResults();
+ assert(affines.size() == enc.getDimLevelType().size());
+ for (unsigned i = 0, e = affines.size(); i < e; i++) {
+ AffineExpr exp = affines[toOrigDim(enc, i)];
+ // Skip simple affine expression and non dense dimensions (which has
+ // it own filter loop).
+ if (exp.isa<AffineDimExpr>() || !isDenseDLT(getDimLevelType(enc, i)))
+ continue;
+
+ // Constant affine expressions on dense level required to be generated
+ // when
+ // 1. The previous level is an (at-level) invariant compound dense
+ // affine (with no corresponding loop idx); or
+ // 2. The previous level is being generated right now.
+ if (exp.isa<AffineConstantExpr>()) {
+ // TODO: Should we come up with a more adhersive way to handle
+ // constant expression? We now requires two (somehow ad-hoc) code for
+ // it.
+ assert(false && "do not support constant affine");
+ }
+
+ bool atLevel = false;
+ if (isInvariantAffine(codegen, exp, idx, atLevel) && atLevel) {
+ // If the compound affine is invariant and we are right at the
+ // level. We need to generate the address according to the affine
+ // expression. This is also the best place we can do it to avoid
+ // putting it inside inner loops.
+ // NOTE: It assumes that the levels of the input tensor are
+ // initialized in order, another more admissible approach might be
+ // accepting out-of-order access between consecutive dense levels.
+ affineTids.push_back(tid);
+ affineDims.push_back(i);
+ exps.push_back(exp);
+ }
+ }
}
});
@@ -1146,12 +1293,23 @@ static Operation *startLoop(Merger &merger, CodeGen &codegen,
// The set of (dense) tensors that is optimized from condition, yet still
// need extra locals to iterate on them.
SmallVector<size_t> extraTids, extraDims;
-
- translateBitsToTidDimPairs(merger, codegen, li, codegen.topSort[at], condTids,
- condDims, extraTids, extraDims);
+ // The set of dense tensors with non-trivial affine expression that just
+ // becomes invariant and the address shall now be generated at the current
+ // level.
+ SmallVector<size_t> affineTids, affineDims;
+ SmallVector<AffineExpr> affines;
+
+ translateBitsToTidDimPairs(merger, codegen, op, li, codegen.topSort[at],
+ condTids, condDims, extraTids, extraDims,
+ affineTids, affineDims, affines);
// Emit the for/while-loop control.
Operation *loop = genLoop(merger, codegen, builder, op, at, needsUniv,
condTids, condDims, extraTids, extraDims);
+
+ for (auto [tid, dim, exp] : llvm::zip(affineTids, affineDims, affines)) {
+ codegen.loopEmitter.genDenseAffineAddressAtCurLevel(builder, op.getLoc(),
+ tid, dim, exp);
+ }
return loop;
}
diff --git a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
index 0c4fe9b14d68d..e312a90c1bcb4 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
@@ -3,6 +3,7 @@
#SpVec = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>
#CSR = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>
+#Row = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense" ] }>
#trait1 = {
indexing_maps = [
@@ -160,3 +161,67 @@ func.func @mul_affine_dense2d(%arga: tensor<32x16xf64, #CSR>,
} -> tensor<32x16xf64>
return %0 : tensor<32x16xf64>
}
+
+#trait4 = {
+ indexing_maps = [
+ affine_map<(i,j) -> (i+2,j)>, // a
+ affine_map<(i,j) -> (i,j+3)>, // b
+ affine_map<(i,j) -> (i,j)> // x (out)
+ ],
+ iterator_types = ["parallel","parallel"],
+ doc = "x(i,j) += a(i+2,j) * b(i,j+3)"
+}
+
+// CHECK-LABEL: func.func @mul_affine_dense_dim_2d(
+// CHECK-SAME: %[[VAL_0:.*]]: tensor<34x16xf64, #sparse_tensor.encoding
+// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME: %[[VAL_2:.*]]: tensor<32x16xf64>) -> tensor<32x16xf64> {
+// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 19 : index
+// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 2 : index
+// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 3 : index
+// CHECK: %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 1 : index} : tensor<34x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
+// CHECK: %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 1 : index} : tensor<34x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
+// CHECK: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<34x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK: %[[VAL_11:.*]] = sparse_tensor.pointers %[[VAL_1]] {dimension = 0 : index} : tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
+// CHECK: %[[VAL_12:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
+// CHECK: %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf64>
+// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK: scf.for %[[VAL_17:.*]] = %[[VAL_15]] to %[[VAL_16]] step %[[VAL_5]] {
+// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_17]]] : memref<?xindex>
+// CHECK: %[[VAL_19:.*]] = arith.addi %[[VAL_18]], %[[VAL_6]] : index
+// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_19]]] : memref<?xindex>
+// CHECK: %[[VAL_21:.*]] = arith.addi %[[VAL_19]], %[[VAL_5]] : index
+// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_21]]] : memref<?xindex>
+// CHECK: scf.for %[[VAL_23:.*]] = %[[VAL_20]] to %[[VAL_22]] step %[[VAL_5]] {
+// CHECK: %[[VAL_24:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_23]]] : memref<?xindex>
+// CHECK: %[[VAL_25:.*]] = arith.addi %[[VAL_24]], %[[VAL_7]] : index
+// CHECK: %[[VAL_26:.*]] = arith.muli %[[VAL_17]], %[[VAL_3]] : index
+// CHECK: %[[VAL_27:.*]] = arith.addi %[[VAL_26]], %[[VAL_25]] : index
+// CHECK: %[[VAL_28:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_18]], %[[VAL_24]]] : memref<32x16xf64>
+// CHECK: %[[VAL_29:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_23]]] : memref<?xf64>
+// CHECK: %[[VAL_30:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_27]]] : memref<?xf64>
+// CHECK: %[[VAL_31:.*]] = arith.mulf %[[VAL_29]], %[[VAL_30]] : f64
+// CHECK: %[[VAL_32:.*]] = arith.addf %[[VAL_28]], %[[VAL_31]] : f64
+// CHECK: memref.store %[[VAL_32]], %[[VAL_14]]{{\[}}%[[VAL_18]], %[[VAL_24]]] : memref<32x16xf64>
+// CHECK: }
+// CHECK: }
+// CHECK: %[[VAL_33:.*]] = bufferization.to_tensor %[[VAL_14]] : memref<32x16xf64>
+// CHECK: return %[[VAL_33]] : tensor<32x16xf64>
+// CHECK: }
+func.func @mul_affine_dense_dim_2d(%arga: tensor<34x16xf64, #CSR>,
+ %argb: tensor<32x19xf64, #Row>,
+ %argx: tensor<32x16xf64>) -> tensor<32x16xf64> {
+ %0 = linalg.generic #trait4
+ ins(%arga, %argb: tensor<34x16xf64, #CSR>, tensor<32x19xf64, #Row>)
+ outs(%argx: tensor<32x16xf64>) {
+ ^bb(%a: f64, %b: f64, %x: f64):
+ %0 = arith.mulf %a, %b : f64
+ %1 = arith.addf %x, %0 : f64
+ linalg.yield %1 : f64
+ } -> tensor<32x16xf64>
+ return %0 : tensor<32x16xf64>
+}
More information about the Mlir-commits
mailing list