[Mlir-commits] [mlir] a2c4ca5 - Revert "[mlir][sparse] support Parallel for/reduction."
Stella Stamenova
llvmlistbot at llvm.org
Mon Nov 7 08:50:42 PST 2022
Author: Stella Stamenova
Date: 2022-11-07T08:48:52-08:00
New Revision: a2c4ca50caf43a3924a37580451ebe9fa3daa128
URL: https://github.com/llvm/llvm-project/commit/a2c4ca50caf43a3924a37580451ebe9fa3daa128
DIFF: https://github.com/llvm/llvm-project/commit/a2c4ca50caf43a3924a37580451ebe9fa3daa128.diff
LOG: Revert "[mlir][sparse] support Parallel for/reduction."
This reverts commit 838389780e56f1a198a94f66ea436359466bf5ed.
This broke the windows mlir buildbot: https://lab.llvm.org/buildbot/#/builders/13/builds/27934
Added:
Modified:
mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h
mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
mlir/test/Dialect/SparseTensor/sparse_parallel.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
Removed:
mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir
################################################################################
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
index 27b7acbd322dc..032d8026b2668 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
@@ -219,12 +219,9 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim(
OpBuilder &builder, Location loc, size_t tid, size_t dim,
MutableArrayRef<Value> reduc, bool isParallel, ArrayRef<size_t> extraTids,
ArrayRef<size_t> extraDims) {
-
assert(dimTypes[tid].size() > dim);
// We can not re-enter the same level.
assert(!coord[tid][dim]);
- // TODO: support multiple return on parallel for?
- assert(!isParallel || reduc.empty() <= 1);
Value step = constantIndex(builder, loc, 1);
auto dimType = dimTypes[tid][dim];
@@ -235,38 +232,11 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim(
Value lo = isSparseInput ? pidxs[tid][dim] // current offset
: loopSeqStack.back(); // univeral tid
Value hi = highs[tid][dim];
- Operation *loop = nullptr;
- Value iv;
- if (isParallel) {
- scf::ParallelOp parOp =
- builder.create<scf::ParallelOp>(loc, lo, hi, step, reduc);
- builder.setInsertionPointToStart(parOp.getBody());
- assert(parOp.getNumReductions() == reduc.size());
- iv = parOp.getInductionVars()[0];
-
- // In-place update on the reduction variable vector.
- // Note that the init vals is not the actual reduction variables but instead
- // used as a `special handle` to (temporarily) represent them. The
- // expression on init vals will be moved into scf.reduce and replaced with
- // the block arguments when exiting the loop (see exitForLoop). This is
- // needed as we can not build the actual reduction block and get the actual
- // reduction varaible before users fill parallel loop body.
- for (int i = 0, e = reduc.size(); i < e; i++)
- reduc[i] = parOp.getInitVals()[i];
- loop = parOp;
- } else {
- scf::ForOp forOp = builder.create<scf::ForOp>(loc, lo, hi, step, reduc);
- builder.setInsertionPointToStart(forOp.getBody());
- iv = forOp.getInductionVar();
-
- // In-place update on the reduction variable vector.
- assert(forOp.getNumRegionIterArgs() == reduc.size());
- for (int i = 0, e = reduc.size(); i < e; i++)
- reduc[i] = forOp.getRegionIterArg(i);
- loop = forOp;
- }
- assert(loop && iv);
+ scf::ForOp forOp = builder.create<scf::ForOp>(loc, lo, hi, step, reduc);
+ builder.setInsertionPointToStart(forOp.getBody());
+ Value iv = forOp.getInductionVar();
+ assert(iv);
if (isSparseInput) {
pidxs[tid][dim] = iv;
// Generating a load on the indices array yields the coordinate.
@@ -283,12 +253,16 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim(
// NOTE: we can also prepares for next dim here in advance
// Push the loop into stack
- loopStack.emplace_back(ArrayRef<size_t>(tid), ArrayRef<size_t>(dim), loop,
+ loopStack.emplace_back(ArrayRef<size_t>(tid), ArrayRef<size_t>(dim), forOp,
coord[tid][dim]);
// Emit extra locals.
emitExtraLocalsForTensorsAtDenseDims(builder, loc, extraTids, extraDims);
- return loop;
+ // In-place update on the reduction variable vector.
+ assert(forOp.getNumRegionIterArgs() == reduc.size());
+ for (int i = 0, e = reduc.size(); i < e; i++)
+ reduc[i] = forOp.getRegionIterArg(i);
+ return forOp;
}
Operation *SparseTensorLoopEmitter::enterCoIterationOverTensorsAtDims(
@@ -460,73 +434,17 @@ void SparseTensorLoopEmitter::emitExtraLocalsForTensorsAtDenseDims(
}
}
-void SparseTensorLoopEmitter::exitForLoop(RewriterBase &rewriter, Location loc,
- MutableArrayRef<Value> reduc) {
+SmallVector<Value, 2>
+SparseTensorLoopEmitter::exitForLoop(OpBuilder &builder, Location loc,
+ ArrayRef<Value> reduc) {
LoopLevelInfo &loopInfo = loopStack.back();
auto &dims = loopStack.back().dims;
auto &tids = loopStack.back().tids;
- auto forOp = llvm::dyn_cast<scf::ForOp>(loopInfo.loop);
- if (forOp) {
- if (!reduc.empty()) {
- assert(reduc.size() == forOp.getNumResults());
- rewriter.setInsertionPointToEnd(forOp.getBody());
- rewriter.create<scf::YieldOp>(loc, reduc);
- }
- // Exit the loop.
- rewriter.setInsertionPointAfter(forOp);
- // In-place update reduction variables.
- for (unsigned i = 0, e = forOp.getResults().size(); i < e; i++)
- reduc[i] = forOp.getResult(i);
- } else {
- auto parOp = llvm::cast<scf::ParallelOp>(loopInfo.loop);
- if (!reduc.empty()) {
- assert(reduc.size() == parOp.getInitVals().size() && reduc.size() == 1);
- Operation *redExp = reduc.front().getDefiningOp();
- // Reduction expression should have no use.
- assert(redExp->getUses().empty());
- // This must be a binary operation.
- // NOTE: This is users' responsibilty to ensure the operation are
- // commutative.
- assert(redExp->getNumOperands() == 2 && redExp->getNumResults() == 1);
-
- Value redVal = parOp.getInitVals().front();
- Value curVal;
- if (redExp->getOperand(0) == redVal)
- curVal = redExp->getOperand(1);
- else if (redExp->getOperand(1) == redVal)
- curVal = redExp->getOperand(0);
- // One of the operands must be the init value (which is also the
- // previous reduction value).
- assert(curVal);
- // The reduction expression should be the only user of the reduction val
- // inside the parallel for.
- unsigned numUsers = 0;
- for (Operation *op : redVal.getUsers()) {
- if (op->getParentOp() == parOp)
- numUsers++;
- }
- assert(numUsers == 1);
- (void)numUsers; // to silence unused variable warning in release build
-
- rewriter.setInsertionPointAfter(redExp);
- auto redOp = rewriter.create<scf::ReduceOp>(loc, curVal);
- // Attach to the reduction op.
- Block *redBlock = &redOp.getRegion().getBlocks().front();
- rewriter.setInsertionPointToEnd(redBlock);
- Operation *newRed = rewriter.clone(*redExp);
- // Replaces arguments of the reduction expression by using the block
- // arguments from scf.reduce.
- rewriter.updateRootInPlace(
- newRed, [&]() { newRed->setOperands(redBlock->getArguments()); });
- // Erases the out-dated reduction expression.
- rewriter.eraseOp(redExp);
- rewriter.setInsertionPointToEnd(redBlock);
- rewriter.create<scf::ReduceReturnOp>(loc, newRed->getResult(0));
- }
- rewriter.setInsertionPointAfter(parOp);
- // In-place update reduction variables.
- for (unsigned i = 0, e = parOp.getResults().size(); i < e; i++)
- reduc[i] = parOp.getResult(i);
+ auto forOp = llvm::cast<scf::ForOp>(loopInfo.loop);
+ if (!reduc.empty()) {
+ assert(reduc.size() == forOp.getNumResults());
+ builder.setInsertionPointToEnd(forOp.getBody());
+ builder.create<scf::YieldOp>(loc, reduc);
}
// Finished iterating a tensor, clean up
@@ -540,10 +458,14 @@ void SparseTensorLoopEmitter::exitForLoop(RewriterBase &rewriter, Location loc,
if (!isDenseDLT(dimTypes[tid][dim]))
highs[tid][dim] = Value();
}
+ // exit the loop
+ builder.setInsertionPointAfter(forOp);
+ return forOp.getResults();
}
-void SparseTensorLoopEmitter::exitCoIterationLoop(
- OpBuilder &builder, Location loc, MutableArrayRef<Value> reduc) {
+SmallVector<Value, 2>
+SparseTensorLoopEmitter::exitCoiterationLoop(OpBuilder &builder, Location loc,
+ ArrayRef<Value> reduc) {
auto whileOp = llvm::cast<scf::WhileOp>(loopStack.back().loop);
auto &dims = loopStack.back().dims;
auto &tids = loopStack.back().tids;
@@ -577,10 +499,10 @@ void SparseTensorLoopEmitter::exitCoIterationLoop(
}
// Reduction value from users.
- for (unsigned i = 0, e = reduc.size(); i < e; i++) {
- operands.push_back(reduc[i]);
- // In place update reduction variable.
- reduc[i] = whileOp->getResult(o++);
+ SmallVector<Value, 2> ret;
+ for (auto red : reduc) {
+ operands.push_back(red);
+ ret.push_back(whileOp->getResult(o++));
}
// An (optional) universal index.
@@ -595,24 +517,26 @@ void SparseTensorLoopEmitter::exitCoIterationLoop(
assert(o == operands.size());
builder.create<scf::YieldOp>(loc, operands);
builder.setInsertionPointAfter(whileOp);
+ return ret;
}
-void SparseTensorLoopEmitter::exitCurrentLoop(RewriterBase &rewriter,
- Location loc,
- MutableArrayRef<Value> reduc) {
+SmallVector<Value, 2>
+SparseTensorLoopEmitter::exitCurrentLoop(OpBuilder &builder, Location loc,
+ ArrayRef<Value> reduc) {
// Clean up the values, it would help use to discover potential bug at a
// earlier stage (instead of silently using a wrong value).
LoopLevelInfo &loopInfo = loopStack.back();
assert(loopInfo.tids.size() == loopInfo.dims.size());
SmallVector<Value, 2> red;
if (llvm::isa<scf::WhileOp>(loopInfo.loop)) {
- exitCoIterationLoop(rewriter, loc, reduc);
+ red = exitCoiterationLoop(builder, loc, reduc);
} else {
- exitForLoop(rewriter, loc, reduc);
+ red = exitForLoop(builder, loc, reduc);
}
assert(loopStack.size() == loopSeqStack.size());
loopStack.pop_back();
+ return red;
}
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h
index a75d3920a4d55..3228eb4c79cb2 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h
@@ -380,8 +380,8 @@ class SparseTensorLoopEmitter {
ArrayRef<size_t> dims, bool needsUniv, MutableArrayRef<Value> reduc = {},
ArrayRef<size_t> extraTids = {}, ArrayRef<size_t> extraDims = {});
- void exitCurrentLoop(RewriterBase &rewriter, Location loc,
- MutableArrayRef<Value> reduc = {});
+ SmallVector<Value, 2> exitCurrentLoop(OpBuilder &builder, Location loc,
+ ArrayRef<Value> reduc = {});
/// Returns the array of coordinate for all the loop generated till now.
void getCoordinateArray(SmallVectorImpl<Value> &coords) const {
@@ -452,35 +452,17 @@ class SparseTensorLoopEmitter {
ArrayRef<size_t> dims);
/// Exits a for loop, returns the reduction results, e.g.,
- /// For sequential for loops:
/// %ret = for () {
/// ...
- /// %val = addi %args, %c
/// yield %val
/// }
- /// For parallel loops, the following generated code by users:
- /// %ret = parallel () init(%args) {
- /// ...
- /// %val = op %args, %c
- /// }
- /// will be transformed into
- /// %ret = parallel () init(%args) {
- /// ...
- /// scf.reduce(%c) bb0(%0, %1){
- /// %val = op %0, %1
- /// scf.reduce.return %val
- /// }
- /// }
- /// NOTE: only one instruction will be moved into reduce block, transformation
- /// will fail if multiple instructions are used to compute the reduction
- /// value.
- /// Return %ret to user, while %val is provided by users (`reduc`).
- void exitForLoop(RewriterBase &rewriter, Location loc,
- MutableArrayRef<Value> reduc);
+ /// Return %ret to user, while %val is provided by users (`reduc`)
+ SmallVector<Value, 2> exitForLoop(OpBuilder &builder, Location loc,
+ ArrayRef<Value> reduc);
/// Exits a while loop, returns the reduction results.
- void exitCoIterationLoop(OpBuilder &builder, Location loc,
- MutableArrayRef<Value> reduc);
+ SmallVector<Value, 2> exitCoiterationLoop(OpBuilder &builder, Location loc,
+ ArrayRef<Value> reduc);
// Whether the loop emitter needs to treat the last tensor as the output
// tensor.
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
index 533d31fdb5536..9f01731a34d4c 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -410,34 +410,6 @@ static Value getCustomRedId(Operation *op) {
// Sparse compiler synthesis methods (statements and expressions).
//===----------------------------------------------------------------------===//
-/// Generates loop boundary statements (entering/exiting loops). The function
-/// passes and updates the reduction value.
-static Optional<Operation *> genLoopBoundary(
- CodeGen &codegen, Merger &merger,
- function_ref<Optional<Operation *>(MutableArrayRef<Value> reduc)>
- callback) {
- SmallVector<Value, 4> reduc;
- if (codegen.redVal)
- reduc.push_back(codegen.redVal);
- if (codegen.expValues)
- reduc.push_back(codegen.expCount);
- if (codegen.insChain)
- reduc.push_back(codegen.insChain);
-
- auto r = callback(reduc);
-
- // Callback should do in-place update on reduction value vector.
- unsigned i = 0;
- if (codegen.redVal)
- updateReduc(merger, codegen, reduc[i++]);
- if (codegen.expValues)
- codegen.expCount = reduc[i++];
- if (codegen.insChain)
- codegen.insChain = reduc[i];
-
- return r;
-}
-
/// Local bufferization of all dense and sparse data structures.
static void genBuffers(Merger &merger, CodeGen &codegen, OpBuilder &builder,
linalg::GenericOp op) {
@@ -897,25 +869,23 @@ static void genExpansion(Merger &merger, CodeGen &codegen, OpBuilder &builder,
/// Returns parallelization strategy. Any implicit loop in the Linalg
/// operation that is marked "parallel" is a candidate. Whether it is actually
/// converted to a parallel operation depends on the requested strategy.
-static bool isParallelFor(CodeGen &codegen, bool isOuter, bool isSparse) {
+static bool isParallelFor(CodeGen &codegen, bool isOuter, bool isReduction,
+ bool isSparse) {
// Reject parallelization of sparse output.
if (codegen.sparseOut)
return false;
- // Parallel loops on tensor expansion can cause data races.
- if (codegen.expCount)
- return false;
// Inspect strategy.
switch (codegen.options.parallelizationStrategy) {
case SparseParallelizationStrategy::kNone:
return false;
case SparseParallelizationStrategy::kDenseOuterLoop:
- return isOuter && !isSparse;
+ return isOuter && !isSparse && !isReduction;
case SparseParallelizationStrategy::kAnyStorageOuterLoop:
- return isOuter;
+ return isOuter && !isReduction;
case SparseParallelizationStrategy::kDenseAnyLoop:
- return !isSparse;
+ return !isSparse && !isReduction;
case SparseParallelizationStrategy::kAnyStorageAnyLoop:
- return true;
+ return !isReduction;
}
llvm_unreachable("unexpected parallelization strategy");
}
@@ -928,16 +898,33 @@ static Operation *genFor(Merger &merger, CodeGen &codegen, OpBuilder &builder,
ArrayRef<size_t> extraDims) {
Location loc = op.getLoc();
auto iteratorTypes = op.getIteratorTypesArray();
+ bool isReduction = linalg::isReductionIterator(iteratorTypes[idx]);
bool isSparse = isCompressedDLT(merger.getDimLevelType(tid, idx)) ||
isSingletonDLT(merger.getDimLevelType(tid, idx));
- bool isParallel = isParallelFor(codegen, isOuter, isSparse);
-
- Operation *loop =
- genLoopBoundary(codegen, merger, [&](MutableArrayRef<Value> reduc) {
- return codegen.loopEmitter.enterLoopOverTensorAtDim(
- builder, loc, tid, dim, reduc, isParallel, extraTids, extraDims);
- }).value();
- assert(loop);
+ bool isParallel = isParallelFor(codegen, isOuter, isReduction, isSparse);
+ assert(!isParallel);
+
+ // Emit a sequential for loop.
+ SmallVector<Value, 4> operands;
+ if (codegen.redVal)
+ operands.push_back(codegen.redVal);
+ if (codegen.expValues)
+ operands.push_back(codegen.expCount);
+ if (codegen.insChain)
+ operands.push_back(codegen.insChain);
+
+ Operation *loop = codegen.loopEmitter.enterLoopOverTensorAtDim(
+ builder, loc, tid, dim, operands, isParallel, extraTids, extraDims);
+
+ unsigned o = 0;
+ if (codegen.redVal)
+ updateReduc(merger, codegen, operands[o++]);
+ if (codegen.expValues)
+ codegen.expCount = operands[o++];
+ if (codegen.insChain)
+ codegen.insChain = operands[o++];
+ assert(o == operands.size());
+
return loop;
}
@@ -947,15 +934,29 @@ static Operation *genWhile(Merger &merger, CodeGen &codegen, OpBuilder &builder,
ArrayRef<size_t> condTids, ArrayRef<size_t> condDims,
ArrayRef<size_t> extraTids,
ArrayRef<size_t> extraDims) {
+ SmallVector<Value, 4> operands;
+
+ // Construct the while-loop with a parameter for each index.
+ if (codegen.redVal)
+ operands.push_back(codegen.redVal);
+ if (codegen.expValues)
+ operands.push_back(codegen.expCount);
+ if (codegen.insChain)
+ operands.push_back(codegen.insChain);
+
+ Operation *loop = codegen.loopEmitter.enterCoIterationOverTensorsAtDims(
+ builder, op.getLoc(), condTids, condDims, needsUniv, operands, extraTids,
+ extraDims);
+
+ unsigned o = 0;
+ if (codegen.redVal)
+ updateReduc(merger, codegen, operands[o++]);
+ if (codegen.expValues)
+ codegen.expCount = operands[o++];
+ if (codegen.insChain)
+ codegen.insChain = operands[o++];
+ assert(o == operands.size());
- Operation *loop =
- genLoopBoundary(codegen, merger, [&](MutableArrayRef<Value> reduc) {
- // Construct the while-loop with a parameter for each index.
- return codegen.loopEmitter.enterCoIterationOverTensorsAtDims(
- builder, op.getLoc(), condTids, condDims, needsUniv, reduc,
- extraTids, extraDims);
- }).value();
- assert(loop);
return loop;
}
@@ -1185,21 +1186,37 @@ static Operation *startLoop(Merger &merger, CodeGen &codegen,
}
/// Ends a single loop in current sequence. Returns new values for needsUniv.
-static bool endLoop(Merger &merger, CodeGen &codegen, RewriterBase &rewriter,
+static bool endLoop(Merger &merger, CodeGen &codegen, OpBuilder &builder,
linalg::GenericOp op, Operation *loop, unsigned idx,
unsigned li, bool needsUniv) {
// End a while-loop.
if (auto whileOp = dyn_cast<scf::WhileOp>(loop)) {
- finalizeWhileOp(merger, codegen, rewriter, op, idx, needsUniv,
+ finalizeWhileOp(merger, codegen, builder, op, idx, needsUniv,
merger.lat(li).bits, whileOp);
} else {
needsUniv = false;
}
- genLoopBoundary(codegen, merger, [&](MutableArrayRef<Value> reduc) {
- codegen.loopEmitter.exitCurrentLoop(rewriter, op.getLoc(), reduc);
- return llvm::None;
- });
+ SmallVector<Value, 2> reduc;
+ if (codegen.redVal)
+ reduc.push_back(codegen.redVal);
+ if (codegen.expValues)
+ reduc.push_back(codegen.expCount);
+ if (codegen.insChain)
+ reduc.push_back(codegen.insChain);
+
+ auto loopRet =
+ codegen.loopEmitter.exitCurrentLoop(builder, op.getLoc(), reduc);
+ assert(reduc.size() == loopRet.size());
+
+ unsigned o = 0;
+ if (codegen.redVal)
+ updateReduc(merger, codegen, loopRet[o++]);
+ if (codegen.expValues)
+ codegen.expCount = loopRet[o++];
+ if (codegen.insChain)
+ codegen.insChain = loopRet[o++];
+ assert(o == loopRet.size());
return needsUniv;
}
diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir
index f38865c5e2a4f..38766b08ccab8 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir
@@ -1,13 +1,14 @@
// RUN: mlir-opt %s -sparsification="parallelization-strategy=none" | \
// RUN: FileCheck %s --check-prefix=CHECK-PAR0
-// RUN: mlir-opt %s -sparsification="parallelization-strategy=dense-outer-loop" | \
-// RUN: FileCheck %s --check-prefix=CHECK-PAR1
-// RUN: mlir-opt %s -sparsification="parallelization-strategy=any-storage-outer-loop" | \
-// RUN: FileCheck %s --check-prefix=CHECK-PAR2
-// RUN: mlir-opt %s -sparsification="parallelization-strategy=dense-any-loop" | \
-// RUN: FileCheck %s --check-prefix=CHECK-PAR3
-// RUN: mlir-opt %s -sparsification="parallelization-strategy=any-storage-any-loop" | \
-// RUN: FileCheck %s --check-prefix=CHECK-PAR4
+// FIXME: we do not support vectorization/parallel loops in loop emitter right now
+// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=dense-outer-loop" | \
+// R_U_N: FileCheck %s --check-prefix=CHECK-PAR1
+// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=any-storage-outer-loop" | \
+// R_U_N: FileCheck %s --check-prefix=CHECK-PAR2
+// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=dense-any-loop" | \
+// R_U_N: FileCheck %s --check-prefix=CHECK-PAR3
+// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=any-storage-any-loop" | \
+// R_U_N: FileCheck %s --check-prefix=CHECK-PAR4
#DenseMatrix = #sparse_tensor.encoding<{
dimLevelType = [ "dense", "dense" ]
@@ -150,8 +151,7 @@ func.func @scale_ss(%scale: f32,
//
// CHECK-PAR4-LABEL: func @matvec
// CHECK-PAR4: scf.parallel
-// CHECK-PAR4: scf.parallel
-// CHECK-PAR4: scf.reduce
+// CHECK-PAR4: scf.for
// CHECK-PAR4: return
//
func.func @matvec(%arga: tensor<16x32xf32, #CSR>,
diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir
deleted file mode 100644
index 8ba66d2c92ae1..0000000000000
--- a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir
+++ /dev/null
@@ -1,63 +0,0 @@
-// RUN: mlir-opt %s -sparsification="parallelization-strategy=any-storage-any-loop" | \
-// RUN: FileCheck %s
-
-#CSR = #sparse_tensor.encoding<{
- dimLevelType = [ "dense", "compressed" ]
-}>
-
-#trait_matvec = {
- indexing_maps = [
- affine_map<(i,j) -> (i,j)>, // A
- affine_map<(i,j) -> (j)>, // b
- affine_map<(i,j) -> (i)> // x (out)
- ],
- iterator_types = ["parallel", "reduction"],
- doc = "x(i) += A(i,j) * b(j)"
-}
-// CHECK-LABEL: func.func @matvec(
-// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<16x32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>>,
-// CHECK-SAME: %[[TMP_arg1:.*]]: tensor<32xf32>,
-// CHECK-SAME: %[[TMP_arg2:.*]]: tensor<16xf32>) -> tensor<16xf32> {
-// CHECK-DAG: %[[TMP_c16:.*]] = arith.constant 16 : index
-// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index
-// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index
-// CHECK: %[[TMP_0:.*]] = sparse_tensor.pointers %[[TMP_arg0]] {dimension = 1 : index}
-// CHECK: %[[TMP_1:.*]] = sparse_tensor.indices %[[TMP_arg0]] {dimension = 1 : index}
-// CHECK: %[[TMP_2:.*]] = sparse_tensor.values %[[TMP_arg0]]
-// CHECK: %[[TMP_3:.*]] = bufferization.to_memref %[[TMP_arg1]] : memref<32xf32>
-// CHECK: %[[TMP_4:.*]] = bufferization.to_memref %[[TMP_arg2]] : memref<16xf32>
-// CHECK: scf.parallel (%[[TMP_arg3:.*]]) = (%[[TMP_c0]]) to (%[[TMP_c16]]) step (%[[TMP_c1]]) {
-// CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_4]][%[[TMP_arg3]]] : memref<16xf32>
-// CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_0]][%[[TMP_arg3]]] : memref<?xindex>
-// CHECK: %[[TMP_8:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index
-// CHECK: %[[TMP_9:.*]] = memref.load %[[TMP_0]][%[[TMP_8]]] : memref<?xindex>
-// CHECK: %[[TMP_10:.*]] = scf.parallel (%[[TMP_arg4:.*]]) = (%[[TMP_7]]) to (%[[TMP_9]]) step (%[[TMP_c1]]) init (%[[TMP_6]]) -> f32 {
-// CHECK: %[[TMP_11:.*]] = memref.load %[[TMP_1]][%[[TMP_arg4]]] : memref<?xindex>
-// CHECK: %[[TMP_12:.*]] = memref.load %[[TMP_2]][%[[TMP_arg4]]] : memref<?xf32>
-// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_3]][%[[TMP_11]]] : memref<32xf32>
-// CHECK: %[[TMP_14:.*]] = arith.mulf %[[TMP_12]], %[[TMP_13]] : f32
-// CHECK: scf.reduce(%[[TMP_14]]) : f32 {
-// CHECK: ^bb0(%[[TMP_arg5:.*]]: f32, %[[TMP_arg6:.*]]: f32):
-// CHECK: %[[TMP_15:.*]] = arith.addf %[[TMP_arg5]], %[[TMP_arg6]] : f32
-// CHECK: scf.reduce.return %[[TMP_15]] : f32
-// CHECK: }
-// CHECK: scf.yield
-// CHECK: }
-// CHECK: memref.store %[[TMP_10]], %[[TMP_4]][%[[TMP_arg3]]] : memref<16xf32>
-// CHECK: scf.yield
-// CHECK: }
-// CHECK: %[[TMP_5:.*]] = bufferization.to_tensor %[[TMP_4]] : memref<16xf32>
-// CHECK: return %[[TMP_5]] : tensor<16xf32>
-func.func @matvec(%arga: tensor<16x32xf32, #CSR>,
- %argb: tensor<32xf32>,
- %argx: tensor<16xf32>) -> tensor<16xf32> {
- %0 = linalg.generic #trait_matvec
- ins(%arga, %argb : tensor<16x32xf32, #CSR>, tensor<32xf32>)
- outs(%argx: tensor<16xf32>) {
- ^bb(%A: f32, %b: f32, %x: f32):
- %0 = arith.mulf %A, %b : f32
- %1 = arith.addf %0, %x : f32
- linalg.yield %1 : f32
- } -> tensor<16xf32>
- return %0 : tensor<16xf32>
-}
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir
index 459b0e13667f6..c12d2b9b913e4 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir
@@ -2,14 +2,6 @@
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
-//
-// Do the same run, but now with parallelization.
-//
-// RUN: mlir-opt %s --sparse-compiler="parallelization-strategy=any-storage-any-loop" | \
-// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
-// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
-// RUN: FileCheck %s
-
#CSR = #sparse_tensor.encoding<{
dimLevelType = [ "dense", "compressed" ],
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
index adc0b261f04d3..59e7f33c22c88 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
@@ -4,16 +4,6 @@
// RUN: -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s
-//
-// Do the same run, but now with parallelization.
-//
-// RUN: mlir-opt %s \
-// RUN: --sparse-compiler="parallelization-strategy=any-storage-any-loop" | \
-// RUN: TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \
-// RUN: mlir-cpu-runner \
-// RUN: -e entry -entry-point-result=void \
-// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
-// RUN: FileCheck %s
!Filename = !llvm.ptr<i8>
More information about the Mlir-commits
mailing list