[Mlir-commits] [mlir] [mlir][sparse] initialize slice-driven loop-related fields in one place (PR #76099)
Yinying Li
llvmlistbot at llvm.org
Wed Dec 20 13:41:38 PST 2023
================
@@ -487,35 +492,70 @@ void LoopEmitter::initializeLoopEmit(
// hoist the code ouside if-conditions.
}
- Type indexType = builder.getIndexType();
- Value c0 = constantZero(builder, loc, indexType);
+ initSliceDriven(builder, loc);
+}
+
+void LoopEmitter::initSliceDriven(OpBuilder &builder, Location loc) {
+ Value c0 = C_IDX(0);
for (TensorId t = 0, e = tensors.size(); t < e; t++) {
auto rtp = dyn_cast<RankedTensorType>(tensors[t].getType());
if (!rtp)
continue;
Level lvlRank = SparseTensorType(rtp).getLvlRank();
+
+ // Compute the dependency reduction order.
+ auto remDepStack = dependentLvlMap;
+ std::vector<std::tuple<LoopId, TensorId, Level>> depRedOrder;
for (Level lvl = 0; lvl < lvlRank; lvl++) {
- if (!dependentLvlMap[t][lvl].empty()) {
- ArrayRef<std::pair<TensorLevel, unsigned>> depLvls =
- dependentLvlMap[t][lvl];
- // Needs at least two operands to form a non-trivial affine expression.
- assert(depLvls.size() == sliceMeta[t][lvl].size());
-
- Value size = c0;
- for (int e = depLvls.size() - 1; e >= 0; e--) {
- auto [dt, dl] = unpackTensorLevel(depLvls[e].first);
- unsigned stride = depLvls[e].second;
- Value stridedSize = lvlSizes[dt][dl];
- if (stride != 1)
- stridedSize = MULI(stridedSize, C_IDX(stride));
- size = ADDI(size, stridedSize);
- sliceMeta[t][lvl][e] = std::make_pair(size, stride);
- }
+ // Reverse queue into a stack.
+ std::reverse(remDepStack[t][lvl].begin(), remDepStack[t][lvl].end());
+ for (auto [loop, coeff] : dependentLvlMap[t][lvl])
+ depRedOrder.emplace_back(std::make_tuple(loop, t, lvl));
+ }
+
+ if (depRedOrder.empty())
+ continue;
+ std::sort(depRedOrder.begin(), depRedOrder.end(),
+ [](auto &l, auto &r) { return std::get<0>(l) < std::get<0>(r); });
+
+ for (auto [loop, t, lvl] : depRedOrder) {
+ std::pair<LoopId, unsigned> curDep = remDepStack[t][lvl].back();
+ assert(curDep.first == loop);
+ Value size = c0;
+ for (auto [loop, stride] : remDepStack[t][lvl]) {
+ // The synthetic tensor high defines the loop upper bound.
+ Value loopHi = highs[getSynTensorId()][loop];
+ size = ADDI(size, MULI(loopHi, C_IDX(stride)));
}
+ sliceMeta[t][lvl].emplace_back(size, curDep.second);
+ remDepStack[t][lvl].pop_back();
+
+ // Generate caches required to fast compute next-non-empty slices with
+ // increasing offset for slice-base loop.
+ // We do not need cache for dense levels.
+ if (!remDepStack[t][lvl].empty() && !isDenseLT(lvls[t][lvl]->getLT())) {
+ Value cnt = C_IDX(1);
+ for (int preLvl = lvl - 1; preLvl >= 0; preLvl--) {
+ if (remDepStack[t][preLvl].empty())
+ break;
+ assert(remDepStack[t][preLvl].size() == 1 && "Not implemented");
+ auto [loop, stride] = remDepStack[t][preLvl].back();
+ assert(stride == 1 && "Not yet implemented");
+ // Accumlates the size required to cache the pLo for the slice.
+ // E.g., if we want to cache the pIdx for slice<d0xd1xf64> on the
+ // second level. We at most need to a memref<d0xindex>.
----------------
yinying-lisa-li wrote:
nit: extra `to` or need a verb.
https://github.com/llvm/llvm-project/pull/76099
More information about the Mlir-commits
mailing list