[Mlir-commits] [mlir] d811aee - [MLIR][NFC] update/clean up affine PDT, related utils, its test case
Uday Bondhugula
llvmlistbot at llvm.org
Mon Mar 16 17:43:15 PDT 2020
Author: Uday Bondhugula
Date: 2020-03-17T06:12:16+05:30
New Revision: d811aee5d9c1f8417a59222a7bd274a94e33f7b9
URL: https://github.com/llvm/llvm-project/commit/d811aee5d9c1f8417a59222a7bd274a94e33f7b9
DIFF: https://github.com/llvm/llvm-project/commit/d811aee5d9c1f8417a59222a7bd274a94e33f7b9.diff
LOG: [MLIR][NFC] update/clean up affine PDT, related utils, its test case
- rename vars that had inst suffixes (due to ops earlier being
known as insts); other renames for better readability
- drop unnecessary matches in test cases
- iterate without block terminator
- comment/doc updates
- instBodySkew -> affineForOpBodySkew
Signed-off-by: Uday Bondhugula <uday at polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76214
Added:
Modified:
mlir/include/mlir/Transforms/LoopUtils.h
mlir/lib/Transforms/PipelineDataTransfer.cpp
mlir/lib/Transforms/Utils/LoopUtils.cpp
mlir/test/Transforms/pipeline-data-transfer.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Transforms/LoopUtils.h b/mlir/include/mlir/Transforms/LoopUtils.h
index 72db5e625aa2..479b7ce81a42 100644
--- a/mlir/include/mlir/Transforms/LoopUtils.h
+++ b/mlir/include/mlir/Transforms/LoopUtils.h
@@ -79,13 +79,14 @@ void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
AffineMap *map, SmallVectorImpl<Value> *operands,
OpBuilder &builder);
-/// Skew the operations in the body of a 'affine.for' operation with the
+/// Skew the operations in the body of an affine.for operation with the
/// specified operation-wise shifts. The shifts are with respect to the
/// original execution order, and are multiplied by the loop 'step' before being
-/// applied.
+/// applied. If `unrollPrologueEpilogue` is set, fully unroll the prologue and
+/// epilogue loops when possible.
LLVM_NODISCARD
-LogicalResult instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
- bool unrollPrologueEpilogue = false);
+LogicalResult affineForOpBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
+ bool unrollPrologueEpilogue = false);
/// Tiles the specified band of perfectly nested loops creating tile-space loops
/// and intra-tile loops. A band is a contiguous set of loops.
diff --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Transforms/PipelineDataTransfer.cpp
index 7057ecb85383..39874b1bc44a 100644
--- a/mlir/lib/Transforms/PipelineDataTransfer.cpp
+++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp
@@ -22,6 +22,7 @@
#include "mlir/Transforms/Utils.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/Debug.h"
+
#define DEBUG_TYPE "affine-pipeline-data-transfer"
using namespace mlir;
@@ -46,9 +47,9 @@ std::unique_ptr<OpPassBase<FuncOp>> mlir::createPipelineDataTransferPass() {
// Returns the position of the tag memref operand given a DMA operation.
// Temporary utility: will be replaced when DmaStart/DmaFinish abstract op's are
// added. TODO(b/117228571)
-static unsigned getTagMemRefPos(Operation &dmaInst) {
- assert(isa<AffineDmaStartOp>(dmaInst) || isa<AffineDmaWaitOp>(dmaInst));
- if (auto dmaStartOp = dyn_cast<AffineDmaStartOp>(dmaInst)) {
+static unsigned getTagMemRefPos(Operation &dmaOp) {
+ assert(isa<AffineDmaStartOp>(dmaOp) || isa<AffineDmaWaitOp>(dmaOp));
+ if (auto dmaStartOp = dyn_cast<AffineDmaStartOp>(dmaOp)) {
return dmaStartOp.getTagMemRefOperandIndex();
}
// First operand for a dma finish operation.
@@ -79,21 +80,20 @@ static bool doubleBuffer(Value oldMemRef, AffineForOp forOp) {
auto oldMemRefType = oldMemRef.getType().cast<MemRefType>();
auto newMemRefType = doubleShape(oldMemRefType);
- // The double buffer is allocated right before 'forInst'.
- auto *forInst = forOp.getOperation();
- OpBuilder bOuter(forInst);
+ // The double buffer is allocated right before 'forOp'.
+ OpBuilder bOuter(forOp);
// Put together alloc operands for any dynamic dimensions of the memref.
SmallVector<Value, 4> allocOperands;
unsigned dynamicDimCount = 0;
for (auto dimSize : oldMemRefType.getShape()) {
if (dimSize == -1)
- allocOperands.push_back(bOuter.create<DimOp>(forInst->getLoc(), oldMemRef,
- dynamicDimCount++));
+ allocOperands.push_back(
+ bOuter.create<DimOp>(forOp.getLoc(), oldMemRef, dynamicDimCount++));
}
// Create and place the alloc right before the 'affine.for' operation.
Value newMemRef =
- bOuter.create<AllocOp>(forInst->getLoc(), newMemRefType, allocOperands);
+ bOuter.create<AllocOp>(forOp.getLoc(), newMemRefType, allocOperands);
// Create 'iv mod 2' value to index the leading dimension.
auto d0 = bInner.getAffineDimExpr(0);
@@ -118,8 +118,8 @@ static bool doubleBuffer(Value oldMemRef, AffineForOp forOp) {
return false;
}
// Insert the dealloc op right after the for loop.
- bOuter.setInsertionPointAfter(forInst);
- bOuter.create<DeallocOp>(forInst->getLoc(), newMemRef);
+ bOuter.setInsertionPointAfter(forOp);
+ bOuter.create<DeallocOp>(forOp.getLoc(), newMemRef);
return true;
}
@@ -219,11 +219,11 @@ static void findMatchingStartFinishInsts(
}
// For each start operation, we look for a matching finish operation.
- for (auto *dmaStartInst : dmaStartInsts) {
- for (auto *dmaFinishInst : dmaFinishInsts) {
- if (checkTagMatch(cast<AffineDmaStartOp>(dmaStartInst),
- cast<AffineDmaWaitOp>(dmaFinishInst))) {
- startWaitPairs.push_back({dmaStartInst, dmaFinishInst});
+ for (auto *dmaStartOp : dmaStartInsts) {
+ for (auto *dmaFinishOp : dmaFinishInsts) {
+ if (checkTagMatch(cast<AffineDmaStartOp>(dmaStartOp),
+ cast<AffineDmaWaitOp>(dmaFinishOp))) {
+ startWaitPairs.push_back({dmaStartOp, dmaFinishOp});
break;
}
}
@@ -236,8 +236,7 @@ static void findMatchingStartFinishInsts(
void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
auto mayBeConstTripCount = getConstantTripCount(forOp);
if (!mayBeConstTripCount.hasValue()) {
- LLVM_DEBUG(
- forOp.emitRemark("won't pipeline due to unknown trip count loop"));
+ LLVM_DEBUG(forOp.emitRemark("won't pipeline due to unknown trip count"));
return;
}
@@ -258,14 +257,14 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
// the dimension we are adding here for the double buffering is the outermost
// dimension.
for (auto &pair : startWaitPairs) {
- auto *dmaStartInst = pair.first;
- Value oldMemRef = dmaStartInst->getOperand(
- cast<AffineDmaStartOp>(dmaStartInst).getFasterMemPos());
+ auto *dmaStartOp = pair.first;
+ Value oldMemRef = dmaStartOp->getOperand(
+ cast<AffineDmaStartOp>(dmaStartOp).getFasterMemPos());
if (!doubleBuffer(oldMemRef, forOp)) {
// Normally, double buffering should not fail because we already checked
// that there are no uses outside.
LLVM_DEBUG(llvm::dbgs()
- << "double buffering failed for" << dmaStartInst << "\n";);
+ << "double buffering failed for" << dmaStartOp << "\n";);
// IR still valid and semantically correct.
return;
}
@@ -275,13 +274,13 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
// order to create the double buffer above.)
// '-canonicalize' does this in a more general way, but we'll anyway do the
// simple/common case so that the output / test cases looks clear.
- if (auto *allocInst = oldMemRef.getDefiningOp()) {
+ if (auto *allocOp = oldMemRef.getDefiningOp()) {
if (oldMemRef.use_empty()) {
- allocInst->erase();
+ allocOp->erase();
} else if (oldMemRef.hasOneUse()) {
if (auto dealloc = dyn_cast<DeallocOp>(*oldMemRef.user_begin())) {
dealloc.erase();
- allocInst->erase();
+ allocOp->erase();
}
}
}
@@ -289,22 +288,21 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
// Double the buffers for tag memrefs.
for (auto &pair : startWaitPairs) {
- auto *dmaFinishInst = pair.second;
- Value oldTagMemRef =
- dmaFinishInst->getOperand(getTagMemRefPos(*dmaFinishInst));
+ auto *dmaFinishOp = pair.second;
+ Value oldTagMemRef = dmaFinishOp->getOperand(getTagMemRefPos(*dmaFinishOp));
if (!doubleBuffer(oldTagMemRef, forOp)) {
LLVM_DEBUG(llvm::dbgs() << "tag double buffering failed\n";);
return;
}
// If the old tag has no uses or a single dealloc use, remove it.
// (canonicalization handles more complex cases).
- if (auto *tagAllocInst = oldTagMemRef.getDefiningOp()) {
+ if (auto *tagAllocOp = oldTagMemRef.getDefiningOp()) {
if (oldTagMemRef.use_empty()) {
- tagAllocInst->erase();
+ tagAllocOp->erase();
} else if (oldTagMemRef.hasOneUse()) {
if (auto dealloc = dyn_cast<DeallocOp>(*oldTagMemRef.user_begin())) {
dealloc.erase();
- tagAllocInst->erase();
+ tagAllocOp->erase();
}
}
}
@@ -317,12 +315,12 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
// Store shift for operation for later lookup for AffineApplyOp's.
DenseMap<Operation *, unsigned> instShiftMap;
for (auto &pair : startWaitPairs) {
- auto *dmaStartInst = pair.first;
- assert(isa<AffineDmaStartOp>(dmaStartInst));
- instShiftMap[dmaStartInst] = 0;
+ auto *dmaStartOp = pair.first;
+ assert(isa<AffineDmaStartOp>(dmaStartOp));
+ instShiftMap[dmaStartOp] = 0;
// Set shifts for DMA start op's affine operand computation slices to 0.
SmallVector<AffineApplyOp, 4> sliceOps;
- mlir::createAffineComputationSlice(dmaStartInst, &sliceOps);
+ mlir::createAffineComputationSlice(dmaStartOp, &sliceOps);
if (!sliceOps.empty()) {
for (auto sliceOp : sliceOps) {
instShiftMap[sliceOp.getOperation()] = 0;
@@ -331,7 +329,7 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
// If a slice wasn't created, the reachable affine.apply op's from its
// operands are the ones that go with it.
SmallVector<Operation *, 4> affineApplyInsts;
- SmallVector<Value, 4> operands(dmaStartInst->getOperands());
+ SmallVector<Value, 4> operands(dmaStartOp->getOperands());
getReachableAffineApplyOps(operands, affineApplyInsts);
for (auto *op : affineApplyInsts) {
instShiftMap[op] = 0;
@@ -339,16 +337,14 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
}
}
// Everything else (including compute ops and dma finish) are shifted by one.
- for (auto &op : *forOp.getBody()) {
- if (instShiftMap.find(&op) == instShiftMap.end()) {
+ for (auto &op : forOp.getBody()->without_terminator())
+ if (instShiftMap.find(&op) == instShiftMap.end())
instShiftMap[&op] = 1;
- }
- }
// Get shifts stored in map.
std::vector<uint64_t> shifts(forOp.getBody()->getOperations().size());
unsigned s = 0;
- for (auto &op : *forOp.getBody()) {
+ for (auto &op : forOp.getBody()->without_terminator()) {
assert(instShiftMap.find(&op) != instShiftMap.end());
shifts[s++] = instShiftMap[&op];
@@ -365,7 +361,7 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
return;
}
- if (failed(instBodySkew(forOp, shifts))) {
+ if (failed(affineForOpBodySkew(forOp, shifts))) {
LLVM_DEBUG(llvm::dbgs() << "op body skewing failed - unexpected\n";);
return;
}
diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp
index 0df8837503d7..96b4e72eff48 100644
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -156,65 +156,57 @@ void mlir::promoteSingleIterationLoops(FuncOp f) {
f.walk([](AffineForOp forOp) { promoteIfSingleIteration(forOp); });
}
-/// Generates a 'affine.for' op with the specified lower and upper bounds
-/// while generating the right IV remappings for the shifted operations. The
-/// operation blocks that go into the loop are specified in instGroupQueue
-/// starting from the specified offset, and in that order; the first element of
-/// the pair specifies the shift applied to that group of operations; note
-/// that the shift is multiplied by the loop step before being applied. Returns
-/// nullptr if the generated loop simplifies to a single iteration one.
-static AffineForOp
-generateLoop(AffineMap lbMap, AffineMap ubMap,
- const std::vector<std::pair<uint64_t, ArrayRef<Operation *>>>
- &instGroupQueue,
- unsigned offset, AffineForOp srcForInst, OpBuilder b) {
- auto lbOperands = srcForInst.getLowerBoundOperands();
- auto ubOperands = srcForInst.getUpperBoundOperands();
+/// Generates an affine.for op with the specified lower and upper bounds
+/// while generating the right IV remappings to realize shifts for operations in
+/// its body. The operations that go into the loop body are specified in
+/// opGroupQueue starting from the specified offset, and in that order. The
+/// first element of the pair specifies the shift applied to that group of
+/// operations; the shift is multiplied by the loop step before being applied.
+/// Returns nullptr if the generated loop simplifies to a single iteration one.
+static AffineForOp generateShiftedLoop(
+ AffineMap lbMap, AffineMap ubMap,
+ const std::vector<std::pair<uint64_t, ArrayRef<Operation *>>> &opGroupQueue,
+ unsigned offset, AffineForOp srcForOp, OpBuilder b) {
+ auto lbOperands = srcForOp.getLowerBoundOperands();
+ auto ubOperands = srcForOp.getUpperBoundOperands();
assert(lbMap.getNumInputs() == lbOperands.size());
assert(ubMap.getNumInputs() == ubOperands.size());
- auto loopChunk =
- b.create<AffineForOp>(srcForInst.getLoc(), lbOperands, lbMap, ubOperands,
- ubMap, srcForInst.getStep());
+ auto loopChunk = b.create<AffineForOp>(srcForOp.getLoc(), lbOperands, lbMap,
+ ubOperands, ubMap, srcForOp.getStep());
auto loopChunkIV = loopChunk.getInductionVar();
- auto srcIV = srcForInst.getInductionVar();
+ auto srcIV = srcForOp.getInductionVar();
BlockAndValueMapping operandMap;
OpBuilder bodyBuilder = loopChunk.getBodyBuilder();
- for (auto it = instGroupQueue.begin() + offset, e = instGroupQueue.end();
- it != e; ++it) {
+ for (auto it = opGroupQueue.begin() + offset, e = opGroupQueue.end(); it != e;
+ ++it) {
uint64_t shift = it->first;
- auto insts = it->second;
+ auto ops = it->second;
// All 'same shift' operations get added with their operands being
// remapped to results of cloned operations, and their IV used remapped.
// Generate the remapping if the shift is not zero: remappedIV = newIV -
// shift.
if (!srcIV.use_empty() && shift != 0) {
auto ivRemap = bodyBuilder.create<AffineApplyOp>(
- srcForInst.getLoc(),
+ srcForOp.getLoc(),
bodyBuilder.getSingleDimShiftAffineMap(
- -static_cast<int64_t>(srcForInst.getStep() * shift)),
+ -static_cast<int64_t>(srcForOp.getStep() * shift)),
loopChunkIV);
operandMap.map(srcIV, ivRemap);
} else {
operandMap.map(srcIV, loopChunkIV);
}
- for (auto *op : insts) {
- if (!isa<AffineTerminatorOp>(op))
- bodyBuilder.clone(*op, operandMap);
- }
+ for (auto *op : ops)
+ bodyBuilder.clone(*op, operandMap);
};
if (succeeded(promoteIfSingleIteration(loopChunk)))
return AffineForOp();
return loopChunk;
}
-/// Skew the operations in the body of a 'affine.for' operation with the
-/// specified operation-wise shifts. The shifts are with respect to the
-/// original execution order, and are multiplied by the loop 'step' before being
-/// applied. A shift of zero for each operation will lead to no change.
// The skewing of operations with respect to one another can be used for
// example to allow overlap of asynchronous operations (such as DMA
// communication) with computation, or just relative shifting of operations
@@ -226,8 +218,9 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
// asserts preservation of SSA dominance. A check for that as well as that for
// memory-based dependence preservation check rests with the users of this
// method.
-LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
- bool unrollPrologueEpilogue) {
+LogicalResult mlir::affineForOpBodySkew(AffineForOp forOp,
+ ArrayRef<uint64_t> shifts,
+ bool unrollPrologueEpilogue) {
if (forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
return success();
@@ -263,11 +256,11 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
// An array of operation groups sorted by shift amount; each group has all
// operations with the same shift in the order in which they appear in the
// body of the 'affine.for' op.
- std::vector<std::vector<Operation *>> sortedInstGroups(maxShift + 1);
+ std::vector<std::vector<Operation *>> sortedOpGroups(maxShift + 1);
unsigned pos = 0;
- for (auto &op : *forOp.getBody()) {
+ for (auto &op : forOp.getBody()->without_terminator()) {
auto shift = shifts[pos++];
- sortedInstGroups[shift].push_back(&op);
+ sortedOpGroups[shift].push_back(&op);
}
// Unless the shifts have a specific pattern (which actually would be the
@@ -275,40 +268,39 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
// Nevertheless, if 'unrollPrologueEpilogue' is set, we will treat the first
// loop generated as the prologue and the last as epilogue and unroll these
// fully.
- AffineForOp prologue;
- AffineForOp epilogue;
+ AffineForOp prologue, epilogue;
// Do a sweep over the sorted shifts while storing open groups in a
// vector, and generating loop portions as necessary during the sweep. A block
// of operations is paired with its shift.
- std::vector<std::pair<uint64_t, ArrayRef<Operation *>>> instGroupQueue;
+ std::vector<std::pair<uint64_t, ArrayRef<Operation *>>> opGroupQueue;
auto origLbMap = forOp.getLowerBoundMap();
uint64_t lbShift = 0;
OpBuilder b(forOp.getOperation());
- for (uint64_t d = 0, e = sortedInstGroups.size(); d < e; ++d) {
+ for (uint64_t d = 0, e = sortedOpGroups.size(); d < e; ++d) {
// If nothing is shifted by d, continue.
- if (sortedInstGroups[d].empty())
+ if (sortedOpGroups[d].empty())
continue;
- if (!instGroupQueue.empty()) {
+ if (!opGroupQueue.empty()) {
assert(d >= 1 &&
"Queue expected to be empty when the first block is found");
// The interval for which the loop needs to be generated here is:
// [lbShift, min(lbShift + tripCount, d)) and the body of the
- // loop needs to have all operations in instQueue in that order.
+ // loop needs to have all operations in opQueue in that order.
AffineForOp res;
if (lbShift + tripCount * step < d * step) {
- res = generateLoop(
+ res = generateShiftedLoop(
b.getShiftedAffineMap(origLbMap, lbShift),
b.getShiftedAffineMap(origLbMap, lbShift + tripCount * step),
- instGroupQueue, 0, forOp, b);
+ opGroupQueue, /*offset=*/0, forOp, b);
// Entire loop for the queued op groups generated, empty it.
- instGroupQueue.clear();
+ opGroupQueue.clear();
lbShift += tripCount * step;
} else {
- res = generateLoop(b.getShiftedAffineMap(origLbMap, lbShift),
- b.getShiftedAffineMap(origLbMap, d), instGroupQueue,
- 0, forOp, b);
+ res = generateShiftedLoop(b.getShiftedAffineMap(origLbMap, lbShift),
+ b.getShiftedAffineMap(origLbMap, d),
+ opGroupQueue, /*offset=*/0, forOp, b);
lbShift = d * step;
}
if (!prologue && res)
@@ -319,16 +311,16 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
lbShift = d * step;
}
// Augment the list of operations that get into the current open interval.
- instGroupQueue.push_back({d, sortedInstGroups[d]});
+ opGroupQueue.push_back({d, sortedOpGroups[d]});
}
// Those operations groups left in the queue now need to be processed (FIFO)
// and their loops completed.
- for (unsigned i = 0, e = instGroupQueue.size(); i < e; ++i) {
- uint64_t ubShift = (instGroupQueue[i].first + tripCount) * step;
- epilogue = generateLoop(b.getShiftedAffineMap(origLbMap, lbShift),
- b.getShiftedAffineMap(origLbMap, ubShift),
- instGroupQueue, i, forOp, b);
+ for (unsigned i = 0, e = opGroupQueue.size(); i < e; ++i) {
+ uint64_t ubShift = (opGroupQueue[i].first + tripCount) * step;
+ epilogue = generateShiftedLoop(b.getShiftedAffineMap(origLbMap, lbShift),
+ b.getShiftedAffineMap(origLbMap, ubShift),
+ opGroupQueue, /*offset=*/i, forOp, b);
lbShift = ubShift;
if (!prologue)
prologue = epilogue;
diff --git a/mlir/test/Transforms/pipeline-data-transfer.mlir b/mlir/test/Transforms/pipeline-data-transfer.mlir
index 8293120d50e8..a2ff12625520 100644
--- a/mlir/test/Transforms/pipeline-data-transfer.mlir
+++ b/mlir/test/Transforms/pipeline-data-transfer.mlir
@@ -36,23 +36,23 @@ func @loop_nest_dma() {
// CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}} mod 2, %{{.*}}], %{{.*}}[%{{.*}} mod 2, 0], %{{.*}} : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
// CHECK-NEXT: affine.for %{{.*}} = 1 to 8 {
// CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[%{{.*}} mod 2, %{{.*}}], %{{.*}}[%{{.*}} mod 2, 0], %{{.*}} : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
-// CHECK-NEXT: %{{.*}} = affine.apply [[MAP_MINUS_1]](%{{.*}})
-// CHECK-NEXT: %{{.*}} = affine.apply [[MOD_2]](%{{.*}})
-// CHECK-NEXT: %{{.*}} = affine.apply [[MOD_2]](%{{.*}})
+// CHECK-NEXT: affine.apply [[MAP_MINUS_1]](%{{.*}})
+// CHECK-NEXT: affine.apply [[MOD_2]](%{{.*}})
+// CHECK-NEXT: affine.apply [[MOD_2]](%{{.*}})
// CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}} mod 2, 0], %{{.*}} : memref<2x1xf32>
-// CHECK-NEXT: %{{.*}} = affine.load %{{.*}}[%{{.*}} mod 2, %{{.*}}] : memref<2x32xf32, 1>
-// CHECK-NEXT: %{{.*}} = "compute"(%{{.*}}) : (f32) -> f32
+// CHECK-NEXT: affine.load %{{.*}}[%{{.*}} mod 2, %{{.*}}] : memref<2x32xf32, 1>
+// CHECK-NEXT: "compute"(%{{.*}}) : (f32) -> f32
// CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}} mod 2, %{{.*}}] : memref<2x32xf32, 1>
// CHECK-NEXT: affine.for %{{.*}} = 0 to 32 {
// CHECK-NEXT: "do_more_compute"(%{{.*}}, %{{.*}}) : (index, index) -> ()
// CHECK-NEXT: }
// CHECK-NEXT: }
-// CHECK-NEXT: %{{.*}} = affine.apply [[MAP_MINUS_1]](%{{.*}})
-// CHECK-NEXT: %{{.*}} = affine.apply [[MOD_2]](%{{.*}})
-// CHECK-NEXT: %{{.*}} = affine.apply [[MOD_2]](%{{.*}})
+// CHECK-NEXT: affine.apply [[MAP_MINUS_1]](%{{.*}})
+// CHECK-NEXT: affine.apply [[MOD_2]](%{{.*}})
+// CHECK-NEXT: affine.apply [[MOD_2]](%{{.*}})
// CHECK-NEXT: affine.dma_wait %{{.*}}[%{{.*}} mod 2, 0], %{{.*}} : memref<2x1xf32>
-// CHECK-NEXT: %{{.*}} = affine.load %{{.*}}[%{{.*}} mod 2, %{{.*}}] : memref<2x32xf32, 1>
-// CHECK-NEXT: %{{.*}} = "compute"(%{{.*}}) : (f32) -> f32
+// CHECK-NEXT: affine.load %{{.*}}[%{{.*}} mod 2, %{{.*}}] : memref<2x32xf32, 1>
+// CHECK-NEXT: "compute"(%{{.*}}) : (f32) -> f32
// CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}} mod 2, %{{.*}}] : memref<2x32xf32, 1>
// CHECK-NEXT: affine.for %{{.*}} = 0 to 32 {
// CHECK-NEXT: "do_more_compute"(%{{.*}}, %{{.*}}) : (index, index) -> ()
@@ -89,8 +89,8 @@ func @loop_step(%arg0: memref<512xf32>,
// CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[(%{{.*}} floordiv 4) mod 2, 0], [[TAG]][(%{{.*}} floordiv 4) mod 2, 0], %{{.*}} : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
// CHECK-NEXT: affine.for %{{.*}} = 4 to 512 step 4 {
// CHECK-NEXT: affine.dma_start %{{.*}}[%{{.*}}], %{{.*}}[(%{{.*}} floordiv 4) mod 2, 0], [[TAG]][(%{{.*}} floordiv 4) mod 2, 0], %{{.*}} : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
-// CHECK-NEXT: %{{.*}} = affine.apply [[REMAP_SHIFT_MINUS_4]](%{{.*}})
-// CHECK-NEXT: %{{.*}} = affine.apply [[FLOOR_MOD_2]](%{{.*}})
+// CHECK-NEXT: affine.apply [[REMAP_SHIFT_MINUS_4]](%{{.*}})
+// CHECK-NEXT: affine.apply [[FLOOR_MOD_2]](%{{.*}})
// CHECK: affine.dma_wait [[TAG]][(%{{.*}} floordiv 4) mod 2, 0], %{{.*}} : memref<2x1xi32>
// CHECK-NEXT: "compute"(%{{.*}}) : (index) -> ()
// CHECK-NEXT: }
@@ -313,7 +313,7 @@ func @live_out_use(%arg0: memref<512 x 32 x f32>) -> f32 {
dealloc %tag : memref<1 x i32>
dealloc %Av : memref<32 x 32 x f32, 2>
return %v : f32
-// CHECK: %{{[0-9]+}} = affine.load %{{[0-9]+}}[%{{.*}}, %{{.*}}] : memref<32x32xf32, 2>
+// CHECK: affine.load %{{[0-9]+}}[%{{.*}}, %{{.*}}] : memref<32x32xf32, 2>
// CHECK: return
}
@@ -329,10 +329,10 @@ func @dynamic_shape_dma_buffer(%arg0: memref<512 x 32 x f32>) {
%tag = alloc() : memref<1 x i32>
// Double buffering for dynamic shaped buffer.
-// CHECK: %{{.*}} = alloc(%{{.*}}, %{{.*}}) : memref<?x?xf32, 2>
-// CHECK-NEXT: %{{.*}} = dim %{{.*}}, 0 : memref<?x?xf32, 2>
-// CHECK-NEXT: %{{.*}} = dim %{{.*}}, 1 : memref<?x?xf32, 2>
-// CHECK-NEXT: %{{.*}} = alloc(%{{.*}}, %{{.*}}) : memref<2x?x?xf32, 2>
+// CHECK: alloc(%{{.*}}, %{{.*}}) : memref<?x?xf32, 2>
+// CHECK-NEXT: dim %{{.*}}, 0 : memref<?x?xf32, 2>
+// CHECK-NEXT: dim %{{.*}}, 1 : memref<?x?xf32, 2>
+// CHECK-NEXT: alloc(%{{.*}}, %{{.*}}) : memref<2x?x?xf32, 2>
// CHECK: affine.dma_start %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}}[%{{.*}} mod 2, 0, 0], %{{.*}}[%{{.*}} mod 2, 0], %{{.*}}
affine.for %kTT = 0 to 16 {
affine.dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %tag[%zero], %num_elt :
More information about the Mlir-commits
mailing list