[Mlir-commits] [mlir] 65aaabc - [mlir][sparse] use macro to build common operations in LoopEmitter
Peiming Liu
llvmlistbot at llvm.org
Mon Apr 17 09:57:55 PDT 2023
Author: Peiming Liu
Date: 2023-04-17T16:57:50Z
New Revision: 65aaabc8d1079a3ada643350842336dac39126db
URL: https://github.com/llvm/llvm-project/commit/65aaabc8d1079a3ada643350842336dac39126db
DIFF: https://github.com/llvm/llvm-project/commit/65aaabc8d1079a3ada643350842336dac39126db.diff
LOG: [mlir][sparse] use macro to build common operations in LoopEmitter
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D147553
Added:
Modified:
mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp
index c1608530b7b1..d0318681bcb1 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp
@@ -22,16 +22,24 @@ using namespace mlir;
using namespace mlir::sparse_tensor;
//===----------------------------------------------------------------------===//
-// File local helper functions.
+// File local shorthand macros
//===----------------------------------------------------------------------===//
#define CMPI(p, l, r) \
- (builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::p, l, r) \
+ (builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::p, (l), (r)) \
.getResult())
-#define ADDI(lhs, rhs) (builder.create<arith::AddIOp>(loc, lhs, rhs))
+#define C_IDX(v) (constantIndex(builder, loc, (v)))
+#define YIELD(vs) (builder.create<scf::YieldOp>(loc, (vs)))
+#define ADDI(lhs, rhs) (builder.create<arith::AddIOp>(loc, (lhs), (rhs)))
+#define ANDI(lhs, rhs) (builder.create<arith::AndIOp>(loc, (lhs), (rhs)))
+#define SUBI(lhs, rhs) (builder.create<arith::SubIOp>(loc, (lhs), (rhs)))
+#define MULI(lhs, rhs) (builder.create<arith::MulIOp>(loc, (lhs), (rhs)))
+#define SELECT(c, l, r) (builder.create<arith::SelectOp>(loc, (c), (l), (r)))
-#define C_IDX(v) (constantIndex(builder, loc, v))
+//===----------------------------------------------------------------------===//
+// File local helper functions.
+//===----------------------------------------------------------------------===//
/// Generates a pointer/index load from the sparse storage scheme. Narrower
/// data types need to be zero extended before casting the value into the
@@ -73,9 +81,7 @@ static Value genSliceStride(OpBuilder &builder, Location loc, Value tensor,
static Value toSliceCrd(OpBuilder &builder, Location loc, Value crd,
Value offset, Value stride, Value tensor, Level lvl) {
// tensorCrd = sliceCrd * stride + offset
- crd = builder.create<arith::MulIOp>(loc, crd, stride);
- crd = builder.create<arith::AddIOp>(loc, crd, offset);
- return crd;
+ return ADDI(MULI(crd, stride), offset);
}
/// Generates code to compute the *absolute* offset of the slice based on the
@@ -92,11 +98,11 @@ static Value toSliceCrd(OpBuilder &builder, Location loc, Value crd,
static Value offsetFromMinCoord(OpBuilder &builder, Location loc, Value minCrd,
Value size, Value isNonEmpty) {
Value geSize = CMPI(uge, minCrd, size);
- Value pred = builder.create<arith::AndIOp>(loc, isNonEmpty, geSize);
- Value mp1 = builder.create<arith::AddIOp>(loc, minCrd, C_IDX(1));
- Value mms = builder.create<arith::SubIOp>(loc, mp1, size);
+ Value pred = ANDI(isNonEmpty, geSize);
+ // Computes minCrd - size + 1
+ Value mms = SUBI(ADDI(minCrd, C_IDX(1)), size);
// This is the absolute offset related to the underly tensor.
- return builder.create<arith::SelectOp>(loc, pred, mms, C_IDX(0));
+ return SELECT(pred, mms, C_IDX(0));
}
/// Converts a coordinate relative to the underlying tensor to the coordinate
@@ -108,7 +114,7 @@ static std::pair<Value, Value> fromSliceCrd(OpBuilder &builder, Location loc,
Value stride, Value tensor,
Level lvl) {
// sliceCrd = (tensorCrd - offset) / stride
- crd = builder.create<arith::SubIOp>(loc, crd, offset);
+ crd = SUBI(crd, offset);
Value rem = builder.create<arith::RemUIOp>(loc, crd, stride);
crd = builder.create<arith::DivUIOp>(loc, crd, stride);
return std::make_pair(crd, rem);
@@ -149,7 +155,7 @@ LoopEmitter::genSliceLegitPredicate(OpBuilder &builder, Location loc, Value crd,
// Must meet all condition to be a valid coordinate in slice.
auto pred = conds.front();
for (auto cond : ValueRange(conds).drop_front())
- pred = builder.create<arith::AndIOp>(loc, pred, cond);
+ pred = ANDI(pred, cond);
return {newCrd, pred};
}
@@ -161,11 +167,11 @@ LoopEmitter::genSliceLegitPredicate(OpBuilder &builder, Location loc, Value crd,
Value LoopEmitter::genAddress(OpBuilder &builder, Location loc, TensorId tid,
Level lvl, Value crd) {
Value pos = lvl == 0 ? C_IDX(0) : posits[tid][lvl - 1];
- Value mul = builder.create<arith::MulIOp>(loc, highs[tid][lvl], pos);
+ Value mul = MULI(highs[tid][lvl], pos);
if (isSparseSlices[tid])
crd = toSliceCrd(builder, loc, crd, sliceOffsets[tid][lvl],
sliceStrides[tid][lvl], tensors[tid], lvl);
- Value add = builder.create<arith::AddIOp>(loc, mul, crd);
+ Value add = ADDI(mul, crd);
return add;
}
@@ -192,19 +198,19 @@ Value LoopEmitter::genSegmentHigh(OpBuilder &builder, Location loc,
Value crd = genIndexLoad(builder, loc, coordinates, pos);
Value isSameCrd = builder.create<arith::CmpIOp>(
loc, arith::CmpIPredicate::eq, crd, sameCrd);
- builder.create<scf::YieldOp>(loc, isSameCrd);
+ YIELD(isSameCrd);
// Else, the position is out of bound, yield false to terminate the
// loop.
builder.setInsertionPointToStart(ifInBound.elseBlock());
- builder.create<scf::YieldOp>(loc, constantI1(builder, loc, false));
+ YIELD(constantI1(builder, loc, false));
}
builder.create<scf::ConditionOp>(loc, ifInBound.getResults()[0], ivs);
},
/*afterBuilder=*/
[](OpBuilder &builder, Location loc, ValueRange ivs) {
// pos ++
- Value nextPos = builder.create<arith::AddIOp>(loc, ivs[0], C_IDX(1));
- builder.create<scf::YieldOp>(loc, nextPos);
+ Value nextPos = ADDI(ivs[0], C_IDX(1));
+ YIELD(nextPos);
});
// Return the segment high.
return whileOp.getResult(0);
@@ -223,10 +229,9 @@ Value LoopEmitter::genSparseCrd(OpBuilder &builder, Location loc, TensorId tid,
const Value pos = posits[tid][dstLvl];
const Value off = genIndexLoad(builder, loc, mem, pos);
// Linearized the coordinates within the same collapse reassociation.
- crd = builder.create<arith::AddIOp>(loc, crd, off);
+ crd = ADDI(crd, off);
if (i != reassocSize - 1) {
- crd = builder.create<arith::MulIOp>(loc, crd,
- this->lvlSizes[tid][reassoc[i + 1]]);
+ crd = MULI(crd, this->lvlSizes[tid][reassoc[i + 1]]);
}
}
return crd;
@@ -460,7 +465,7 @@ void LoopEmitter::initializeLoopEmit(OpBuilder &builder, Location loc,
Value size = c0;
for (unsigned e = depLvls.size() - 1; e >= 1; e--) {
auto [dt, dd] = depLvls[e];
- size = builder.create<arith::AddIOp>(loc, size, lvlSizes[dt][dd]);
+ size = ADDI(size, lvlSizes[dt][dd]);
sliceSizes[t][lvl][e - 1] = size;
}
}
@@ -509,8 +514,8 @@ void LoopEmitter::exitCurrentLoopSeq(OpBuilder &builder, Location loc) {
// the previous level by two to skip the [pLo, pHi] for current level.
Value sPtrBuf = slicePosBuffer[tid][lvl].back();
Value curP = genIndexLoad(builder, loc, sPtrBuf, c1);
- Value nexP = builder.create<arith::AddIOp>(loc, curP, c2);
// TODO: we could probably use an SSA value for it.
+ Value nexP = ADDI(curP, c2);
builder.create<memref::StoreOp>(loc, nexP, sPtrBuf, c1);
}
}
@@ -531,15 +536,13 @@ Value LoopEmitter::genAffine(OpBuilder &builder, Location loc, AffineExpr a) {
}
case AffineExprKind::Add: {
auto binOp = a.cast<AffineBinaryOpExpr>();
- return builder.create<arith::AddIOp>(
- loc, genAffine(builder, loc, binOp.getLHS()),
- genAffine(builder, loc, binOp.getRHS()));
+ return ADDI(genAffine(builder, loc, binOp.getLHS()),
+ genAffine(builder, loc, binOp.getRHS()));
}
case AffineExprKind::Mul: {
auto binOp = a.cast<AffineBinaryOpExpr>();
- return builder.create<arith::MulIOp>(
- loc, genAffine(builder, loc, binOp.getLHS()),
- genAffine(builder, loc, binOp.getRHS()));
+ return MULI(genAffine(builder, loc, binOp.getLHS()),
+ genAffine(builder, loc, binOp.getRHS()));
}
case AffineExprKind::Constant: {
int64_t c = a.cast<AffineConstantExpr>().getValue();
@@ -627,10 +630,10 @@ Operation *LoopEmitter::emitForLoopOverTensorAtLvl(
// else
// yield a
// yield %s
- builder.create<scf::YieldOp>(loc, ifOp.getResults());
+ YIELD(ifOp.getResults());
builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
// On mismatch.
- builder.create<scf::YieldOp>(loc, reduc);
+ YIELD(reduc);
}
// Set the insertion point to matched branch.
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
@@ -764,8 +767,8 @@ Operation *LoopEmitter::enterLoopOverTensorAtLvl(
condTid.push_back(tid);
condLvl.push_back(lvl);
} else {
- hi = builder.create<arith::SubIOp>(loc, lvlSizes[tid][lvl], sliceSz);
- hi = builder.create<arith::AddIOp>(loc, hi, C_IDX(1));
+ hi = SUBI(lvlSizes[tid][lvl], sliceSz);
+ hi = ADDI(hi, C_IDX(1));
}
} else {
condTid.push_back(tid);
@@ -784,9 +787,7 @@ Operation *LoopEmitter::enterLoopOverTensorAtLvl(
bool fullyReduc = depFullyReduced(t, l);
SliceInfo &info = sliceStack[t].back();
if (fullyReduc) {
- posits[t][l] =
- genAddress(builder, loc, t, l,
- builder.create<arith::AddIOp>(loc, info.offset, iv));
+ posits[t][l] = genAddress(builder, loc, t, l, ADDI(info.offset, iv));
} else {
// Puts sliced dense loop into LoopInfo so that LoopEmitter knows how to
// exit it.
@@ -860,10 +861,10 @@ Operation *LoopEmitter::enterFilterLoopOverTensorAtLvl(
// else
// yield a
// yield %s
- builder.create<scf::YieldOp>(loc, ifOp.getResults());
+ YIELD(ifOp.getResults());
builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
// On mismatch.
- builder.create<scf::YieldOp>(loc, reduc);
+ YIELD(reduc);
}
// Set the insert point to matched branch.
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
@@ -953,7 +954,7 @@ Operation *LoopEmitter::enterCoIterationOverTensorsAtLvls(
// We used the first level bound as the bound the collapsed set of levels.
Value op2 = highs[tid][reassoc.front()];
Value opc = CMPI(ult, op1, op2);
- cond = cond ? builder.create<arith::AndIOp>(loc, cond, opc) : opc;
+ cond = cond ? ANDI(cond, opc) : opc;
// Update positions
Value pos = after->getArgument(o++);
// For COO, the position is the same across consecutive levels.
@@ -996,23 +997,22 @@ Operation *LoopEmitter::enterCoIterationOverTensorsAtLvls(
//
// This "idx" is the index into `llvm::zip(tids, lvls)`
for (auto [pred, idx] : slicesPreds) {
- Value nextPos = builder.create<arith::AddIOp>(loc, yields[idx], C_IDX(1));
- yields[idx] =
- builder.create<arith::SelectOp>(loc, pred, yields[idx], nextPos);
+ Value nextPos = ADDI(yields[idx], C_IDX(1));
+ yields[idx] = SELECT(pred, yields[idx], nextPos);
}
Value pred = slicesPreds.front().first;
for (int i = 1, e = slicesPreds.size(); i < e; i++) {
- pred = builder.create<arith::AndIOp>(loc, pred, slicesPreds[i].first);
+ pred = ANDI(pred, slicesPreds[i].first);
}
auto ifOp = builder.create<scf::IfOp>(loc, types, pred, /*else*/ true);
ifOp->setAttr(getLoopEmitterLoopAttrName(),
StringAttr::get(builder.getContext(), "slice"));
- builder.create<scf::YieldOp>(loc, ifOp->getResults());
+ YIELD(ifOp->getResults());
assert(types.size() == yields.size());
// If not all slices are legit
builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
- builder.create<scf::YieldOp>(loc, yields);
+ YIELD(yields);
// If all slices are legit, start the user generated code.
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
@@ -1026,8 +1026,8 @@ Operation *LoopEmitter::enterCoIterationOverTensorsAtLvls(
if (isCompressedDLT(lvlTp) || isSingletonDLT(lvlTp)) {
const auto crd = coords[tid][lvl];
if (min) {
- Value cmp = CMPI(ult, crd, min);
- min = builder.create<arith::SelectOp>(loc, cmp, crd, min);
+ Value cmp = CMPI(ult, coords[tid][lvl], min);
+ min = SELECT(cmp, coords[tid][lvl], min);
} else {
min = crd;
}
@@ -1075,12 +1075,11 @@ Operation *LoopEmitter::enterCoIterationOverTensorsAtLvls(
{
OpBuilder::InsertionGuard guard(builder);
builder.setInsertionPointToStart(ifNewSegHi.thenBlock());
- builder.create<scf::YieldOp>(loc,
- genSegmentHigh(builder, loc, tid, srcLvl,
- pos, highs[tid][srcLvl]));
+ YIELD(genSegmentHigh(builder, loc, tid, srcLvl, pos,
+ highs[tid][srcLvl]));
// Else, resues the same segment high.
builder.setInsertionPointToStart(ifNewSegHi.elseBlock());
- builder.create<scf::YieldOp>(loc, oldSegHi);
+ YIELD(oldSegHi);
}
highs[tid][srcLvl + 1] = segHi[tid][srcLvl] = ifNewSegHi.getResult(0);
}
@@ -1126,7 +1125,7 @@ void LoopEmitter::prepareLoopOverTensorAtLvl(OpBuilder &builder, Location loc,
const Value pLo = srcLvl == 0 ? c0 : posits[tid][srcLvl - 1];
posits[tid][srcLvl] = genIndexLoad(builder, loc, mem, pLo);
- const Value pHi = builder.create<arith::AddIOp>(loc, pLo, c1);
+ const Value pHi = ADDI(pLo, c1);
highs[tid][srcLvl] = genIndexLoad(builder, loc, mem, pHi);
return;
}
@@ -1144,7 +1143,7 @@ void LoopEmitter::prepareLoopOverTensorAtLvl(OpBuilder &builder, Location loc,
highs[tid][srcLvl] =
(!isUniqueDLT(lvlTypes[tid][srcLvl - 1]) && parentSegHi)
? parentSegHi
- : builder.create<arith::AddIOp>(loc, pLo, c1);
+ : ADDI(pLo, c1);
return;
}
}
@@ -1340,9 +1339,9 @@ void LoopEmitter::exitWhileLoop(OpBuilder &builder, Location loc,
// segment high.
Value add = !isUniqueDLT(lvlTypes[tid][reassoc.back()])
? segHi[tid][reassoc.back()]
- : builder.create<arith::AddIOp>(loc, pos, one);
+ : ADDI(pos, one);
- operands.push_back(builder.create<arith::SelectOp>(loc, cmp, add, pos));
+ operands.push_back(SELECT(cmp, add, pos));
// Following loops continue iteration from the break point of the
// current while loop.
const Value newPos = whileOp->getResult(o++);
@@ -1372,13 +1371,13 @@ void LoopEmitter::exitWhileLoop(OpBuilder &builder, Location loc,
if (operands.size() + delta < whileOp.getNumResults()) {
assert(operands.size() + delta + 1 == whileOp.getNumResults());
// The last one is the universial index.
- operands.push_back(builder.create<arith::AddIOp>(loc, iv, one));
+ operands.push_back(ADDI(iv, one));
// update the loop starting point of current loop sequence
loopSeqStack.back().first = whileOp->getResult(o++);
}
assert(o == operands.size() + delta);
- builder.create<scf::YieldOp>(loc, operands);
+ YIELD(operands);
builder.setInsertionPointAfter(whileOp);
}
@@ -1439,7 +1438,7 @@ std::pair<Operation *, ValueRange> LoopEmitter::genSliceLvlTraverseLoop(
Value size, TensorId tid, Level lvl, ValueRange userReduc, bool genYield,
LoopBodyBuilder bodyBuilder) {
Value c1 = C_IDX(1);
- Value sliceHi = builder.create<arith::AddIOp>(loc, offset, size);
+ Value sliceHi = ADDI(offset, sliceSizes[tid][lvl].back());
SmallVector<Value> reduc = {
loopLo, // loop lower bounds
@@ -1454,7 +1453,7 @@ std::pair<Operation *, ValueRange> LoopEmitter::genSliceLvlTraverseLoop(
Value lo = args[0];
Value cont = args[1];
Value inBound = CMPI(ult, lo, loopHi);
- Value cond = builder.create<arith::AndIOp>(loc, cont, inBound);
+ Value cond = ANDI(cont, inBound);
// continue if not yet break nor out of bound.
builder.create<scf::ConditionOp>(loc, cond, args);
},
@@ -1476,8 +1475,7 @@ std::pair<Operation *, ValueRange> LoopEmitter::genSliceLvlTraverseLoop(
OpBuilder::InsertionGuard guard(builder);
// If coord >= sliceHi.
builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
- // Coordinates is OOB, just yield.
- builder.create<scf::YieldOp>(loc, ifRet);
+ YIELD(ifRet);
// If coord < sliceHi.
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
@@ -1485,7 +1483,7 @@ std::pair<Operation *, ValueRange> LoopEmitter::genSliceLvlTraverseLoop(
bodyBuilder(builder, loc, iv, ifRet);
if (genYield) {
builder.setInsertionPointToEnd(&ifOp.getThenRegion().front());
- builder.create<scf::YieldOp>(loc, ifRet);
+ YIELD(ifRet);
}
}
// Marks this speical ifOp to avoid sparisification finalizing it.
@@ -1494,11 +1492,10 @@ std::pair<Operation *, ValueRange> LoopEmitter::genSliceLvlTraverseLoop(
// Insertion point restored to after ifOp.
SmallVector<Value> yields;
// Increase induction variable.
- yields.push_back(builder.create<arith::AddIOp>(loc, iv, c1));
- // Terminates the while loop according to the continue flag.
+ yields.push_back(ADDI(iv, c1));
yields.push_back(cont);
yields.append(ifOp.getResults().begin(), ifOp.getResults().end());
- builder.create<scf::YieldOp>(loc, yields);
+ YIELD(yields);
});
builder.setInsertionPointAfter(whileOp);
@@ -1563,7 +1560,7 @@ ValueRange LoopEmitter::genUnResolvedSliceTreeTraverse(
innerArgs.assign(reduc.begin(), reduc.end());
})
.second;
- builder.create<scf::YieldOp>(loc, itArgs);
+ YIELD(itArgs);
});
} else if (isDenseDLT(lvlTypes[tid][firstLvl])) {
assert(firstLvl == 0); // This must be the first level.
@@ -1594,31 +1591,31 @@ ValueRange LoopEmitter::genUnResolvedSliceTreeTraverse(
Value offset = slice->offset;
Value sliceSz = sliceSizes[tid][sliceLvl][slice->depth - 1];
lbs.push_back(offset);
- ubs.push_back(builder.create<arith::AddIOp>(loc, offset, sliceSz));
+ ubs.push_back(ADDI(offset, sliceSz));
steps.push_back(c1);
lvlSzs.push_back(lvlSizes[tid][sliceLvl]);
}
- auto denseNest = scf::buildLoopNest(
- builder, loc, lbs, ubs, steps, innerArgs,
- [&innerArgs, &lvlSzs, &pos,
- bodyBuilder](OpBuilder &builder, Location loc, ValueRange ivs,
- ValueRange iterArgs) -> scf::ValueVector {
- for (auto em : llvm::enumerate(ivs)) {
- // Linearizes postion: pos = (pos * lvlsize) + iv;
- pos = builder.create<arith::MulIOp>(loc, pos, lvlSzs[em.index()]);
- pos = builder.create<arith::AddIOp>(loc, pos, em.value());
- }
- innerArgs.assign(iterArgs.begin(), iterArgs.end());
- // Generates user request loop body.
- // TODO: we do not have to check inbound for dense levels
- bodyBuilder(builder, loc, pos, innerArgs);
- return innerArgs;
- });
- builder.create<scf::YieldOp>(loc, denseNest.results);
+ auto denseNest =
+ scf::buildLoopNest(builder, loc, lbs, ubs, steps, innerArgs,
+ [&innerArgs, &lvlSzs, &pos, bodyBuilder](
+ OpBuilder &builder, Location loc, ValueRange ivs,
+ ValueRange iterArgs) -> scf::ValueVector {
+ for (auto em : llvm::enumerate(ivs)) {
+ // Linearizes postion: pos = (pos * lvlsize) +
+ // iv;
+ pos = MULI(pos, lvlSzs[em.index()]);
+ pos = ADDI(pos, em.value());
+ }
+ innerArgs.assign(iterArgs.begin(), iterArgs.end());
+ // Generates user request loop body.
+ bodyBuilder(builder, loc, pos, innerArgs);
+ return innerArgs;
+ });
+ YIELD(denseNest.results);
} else {
// Generates user request loop body.
bodyBuilder(builder, loc, pos, innerArgs);
- builder.create<scf::YieldOp>(loc, innerArgs);
+ YIELD(innerArgs);
}
// Insert after current while operation.
builder.setInsertionPointAfter(outerMost);
@@ -1733,7 +1730,7 @@ void LoopEmitter::genUnResolvedSliceBegin(OpBuilder &builder, Location loc,
Value &minCrd = reduc[1];
Value &curMemSz = reduc[2];
- Value pHi = builder.create<arith::AddIOp>(loc, iv, c1);
+ Value pHi = ADDI(iv, c1);
Value sPLo = genIndexLoad(builder, loc, positionsBuffers[tid][lvl], iv);
Value sPHi =
genIndexLoad(builder, loc, positionsBuffers[tid][lvl], pHi);
@@ -1757,18 +1754,17 @@ void LoopEmitter::genUnResolvedSliceBegin(OpBuilder &builder, Location loc,
Value curC =
genIndexLoad(builder, loc, coordinatesBuffers[tid][lvl], sPLo);
Value isSmaller = CMPI(ult, curC, minCrd);
- Value newMin =
- builder.create<arith::SelectOp>(loc, isSmaller, curC, minCrd);
- builder.create<scf::YieldOp>(loc, newMin);
+ Value newMin = SELECT(isSmaller, curC, minCrd);
+ YIELD(newMin);
builder.setInsertionPointToStart(ifNonEmpty.elseBlock());
- builder.create<scf::YieldOp>(loc, minCrd);
+ YIELD(minCrd);
}
minCrd = ifNonEmpty.getResult(0);
builder.create<memref::StoreOp>(loc, sPLo, sPtrBuf, curMemSz);
- Value nxtMemSize = builder.create<arith::AddIOp>(loc, curMemSz, c1);
+ Value nxtMemSize = ADDI(curMemSz, c1);
builder.create<memref::StoreOp>(loc, sPHi, sPtrBuf, nxtMemSize);
- // updates the size of the memory curMemSize += 2
- curMemSz = builder.create<arith::AddIOp>(loc, curMemSz, c2);
+ // curMemSize += 2
+ curMemSz = ADDI(curMemSz, c2);
});
Value isNonEmpty = result[0];
@@ -1796,8 +1792,8 @@ bool LoopEmitter::genSliceBegin(OpBuilder &builder, Location loc, TensorId tid,
// algorithm to (co)iterates over the slice.
Value pLoPtr =
genIndexLoad(builder, loc, slicePosBuffer[tid][lvl].back(), c1);
- pLoPtr = builder.create<arith::AddIOp>(loc, pLoPtr, c2);
- Value pHiPtr = builder.create<arith::AddIOp>(loc, pLoPtr, c1);
+ pLoPtr = ADDI(pLoPtr, c2);
+ Value pHiPtr = ADDI(pLoPtr, c1);
posits[tid][lvl] =
genIndexLoad(builder, loc, slicePosBuffer[tid][lvl].back(), pLoPtr);
highs[tid][lvl] =
@@ -1845,14 +1841,14 @@ bool LoopEmitter::genSliceBegin(OpBuilder &builder, Location loc, TensorId tid,
auto depth = remDepOnLevel(tid, curLevel - 1);
assert(sliceSizes[tid][lvl].size() >= depth);
Value sz = *(sliceSizes[tid][lvl].rbegin() + depth - 1);
- bufSize = builder.create<arith::MulIOp>(loc, bufSize, sz);
+ bufSize = MULI(bufSize, sz);
}
// For a pair of [pLo, pHi]. Note that we can not compress pHi because slice
// creates segments in the index buffer so that the pHi for the current
// level is no longer the pLo for the next level.
- bufSize = builder.create<arith::MulIOp>(loc, bufSize, c2);
+ bufSize = MULI(bufSize, c2);
// Additional two metadata {memSize, idx} at head.
- bufSize = builder.create<arith::AddIOp>(loc, bufSize, c2);
+ bufSize = ADDI(bufSize, c2);
llvm::for_each(
slicePosBuffer[tid][lvl], [bufSize, loc, &builder](Value &cache) {
cache = genAlloca(builder, loc, bufSize, builder.getIndexType());
@@ -1923,9 +1919,9 @@ void LoopEmitter::genSliceNextInduction(OpBuilder &builder, Location loc,
// return offset += 1
// }
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
- reduc[2] = builder.create<arith::AddIOp>(loc, absOffset, c1);
+ reduc[2] = ADDI(absOffset, c1);
// Yield offset + 1.
- builder.create<scf::YieldOp>(loc, reduc);
+ YIELD(reduc);
// else /*minCrd == offset*/ {
// for (i = 0; i < slicePos.size(); i+=2) {
@@ -1954,8 +1950,7 @@ void LoopEmitter::genSliceNextInduction(OpBuilder &builder, Location loc,
Type idxTp = builder.getIndexType();
Value pLo = genIndexLoad(builder, loc, sPtrBuf, ivs.front());
Value pHi =
- genIndexLoad(builder, loc, sPtrBuf,
- builder.create<arith::AddIOp>(loc, ivs.front(), c1));
+ genIndexLoad(builder, loc, sPtrBuf, ADDI(ivs.front(), c1));
//
// if (pLo < pHi) // Only loads when inbound.
// coord = load[pLo]
@@ -1977,23 +1972,23 @@ void LoopEmitter::genSliceNextInduction(OpBuilder &builder, Location loc,
/* if coord == minCrd */ {
builder.setInsertionPointToStart(
&ifEqual.getThenRegion().front());
- // pLo += 1.
- Value newPlo = builder.create<arith::AddIOp>(loc, pLo, c1);
+ Value newPlo = ADDI(pLo, c1);
+ // Updates the cache.
builder.create<memref::StoreOp>(loc, newPlo, sPtrBuf,
ivs.front());
- builder.create<scf::YieldOp>(loc, newPlo);
+ YIELD(newPlo);
}
/* else coord != minCrd */ {
builder.setInsertionPointToStart(
&ifEqual.getElseRegion().front());
- builder.create<scf::YieldOp>(loc, pLo);
+ YIELD(pLo);
}
builder.setInsertionPointAfter(ifEqual);
- builder.create<scf::YieldOp>(loc, ifEqual.getResults());
+ YIELD(ifEqual.getResults());
}
/* else pLo >= pHi */ {
builder.setInsertionPointToStart(&advPLo.getElseRegion().front());
- builder.create<scf::YieldOp>(loc, pLo);
+ YIELD(pLo);
}
builder.setInsertionPointAfter(advPLo);
@@ -2003,12 +1998,10 @@ void LoopEmitter::genSliceNextInduction(OpBuilder &builder, Location loc,
auto newMin =
builder.create<scf::IfOp>(loc, idxTp, lvlNonEmpty, true);
builder.setInsertionPointToStart(&newMin.getThenRegion().front());
- builder.create<scf::YieldOp>(
- loc,
- genIndexLoad(builder, loc, coordinatesBuffers[tid][lvl], pLo));
+ YIELD(genIndexLoad(builder, loc, coordinatesBuffers[tid][lvl], pLo));
builder.setInsertionPointToStart(&newMin.getElseRegion().front());
- builder.create<scf::YieldOp>(loc, curMinCrd);
+ YIELD(curMinCrd);
builder.setInsertionPointAfter(newMin);
// isNonEmpty = isNonEmpty || lvlNonEmpty
@@ -2022,15 +2015,14 @@ void LoopEmitter::genSliceNextInduction(OpBuilder &builder, Location loc,
builder.setInsertionPointAfter(forOp.loops.front());
// minOffset = minCrd + 1 >= size ? minCrd + 1 - size : c0
- Value tmp = builder.create<arith::AddIOp>(loc, forOp.results.front(), c1);
- Value minOffset = builder.create<arith::SubIOp>(
- loc, tmp, sliceSizes[tid][lvl][info.depth - 1]);
+ Value tmp = ADDI(forOp.results.front(), c1);
+ Value minOffset = SUBI(tmp, sliceSizes[tid][lvl][info.depth - 1]);
Value p = CMPI(uge, tmp, sliceSizes[tid][lvl][info.depth - 1]);
- minOffset = builder.create<arith::SelectOp>(loc, p, minOffset, c0);
+ minOffset = SELECT(p, minOffset, c0);
SmallVector<Value, 3> yields;
yields.assign(forOp.results.begin(), forOp.results.end());
yields.push_back(minOffset);
- builder.create<scf::YieldOp>(loc, yields);
+ YIELD(yields);
}
Value nextMinCrd = ifOp.getResults()[0];
@@ -2038,25 +2030,21 @@ void LoopEmitter::genSliceNextInduction(OpBuilder &builder, Location loc,
// The next offset should at least be offset + 1;
Value minOffset = ifOp.getResults()[2];
- Value nxOffset = builder.create<arith::AddIOp>(loc, info.offset, c1);
+ Value nxOffset = ADDI(info.offset, c1);
Value maxPred = CMPI(ugt, minOffset, nxOffset);
- Value nextAbsOffset =
- builder.create<arith::SelectOp>(loc, maxPred, minOffset, nxOffset);
+ Value nextAbsOffset = SELECT(maxPred, minOffset, nxOffset);
- Value sliceUB = builder.create<arith::AddIOp>(
- loc, nextAbsOffset, sliceSizes[tid][lvl][info.depth - 1]);
+ Value sliceUB = ADDI(nextAbsOffset, sliceSizes[tid][lvl][info.depth - 1]);
// FIXME: this only works if there is only one parent.
assert(info.depth - 1 == 0);
// nextNonEmpty = nextNonEmpty && slice upper bound <= parent upperbound.
- nextNonEmpty = builder.create<arith::AndIOp>(
- loc, nextNonEmpty, CMPI(ule, sliceUB, lvlSizes[tid][lvl]));
+ nextNonEmpty = ANDI(nextNonEmpty, CMPI(ule, sliceUB, lvlSizes[tid][lvl]));
// FIXME: compute relative offset.
assert(info.depth - 1 == 0);
Value nextRelOffset = nextAbsOffset;
- nextRelOffset =
- builder.create<arith::SelectOp>(loc, nextNonEmpty, nextRelOffset, c0);
+ nextRelOffset = SELECT(nextNonEmpty, nextRelOffset, c0);
operands.push_back(nextNonEmpty);
operands.push_back(nextMinCrd);
@@ -2122,5 +2110,10 @@ Operation *LoopEmitter::emitSliceDrivenLoopOverTensorAtLvl(
}
#undef CMPI
-#undef ADDI
#undef C_IDX
+#undef YIELD
+#undef ADDI
+#undef ANDI
+#undef SUBI
+#undef MULI
+#undef SELECT
More information about the Mlir-commits
mailing list