[Mlir-commits] [mlir] f528df8 - Revert "Add a test for UsedDeclVisitor"
Yaxun Liu
llvmlistbot at llvm.org
Wed Mar 18 21:17:06 PDT 2020
Author: Yaxun (Sam) Liu
Date: 2020-03-19T00:15:47-04:00
New Revision: f528df8e269df8b10de4a2458e99b42d6240ea91
URL: https://github.com/llvm/llvm-project/commit/f528df8e269df8b10de4a2458e99b42d6240ea91
DIFF: https://github.com/llvm/llvm-project/commit/f528df8e269df8b10de4a2458e99b42d6240ea91.diff
LOG: Revert "Add a test for UsedDeclVisitor"
This reverts commit b58f6bb1207521cdf6a856004525d7bffa5f2153.
Added:
Modified:
mlir/lib/Transforms/AffineDataCopyGeneration.cpp
Removed:
clang/test/CodeGenCXX/used-decl-visitor.cpp
################################################################################
diff --git a/clang/test/CodeGenCXX/used-decl-visitor.cpp b/clang/test/CodeGenCXX/used-decl-visitor.cpp
deleted file mode 100644
index 2b923ab562db..000000000000
--- a/clang/test/CodeGenCXX/used-decl-visitor.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-// RUN: %clang_cc1 -triple x86_64 -emit-llvm -o %t %s
-
-// Make sure there is no assertion due to UsedDeclVisitor.
-
-struct A {
- int a;
-};
-
-static A a;
-
-struct B {
- B(int b = a.a) {}
-};
-
-
-void foo() {
- B();
-}
diff --git a/mlir/lib/Transforms/AffineDataCopyGeneration.cpp b/mlir/lib/Transforms/AffineDataCopyGeneration.cpp
index a7f90b0c19b9..5409c557da83 100644
--- a/mlir/lib/Transforms/AffineDataCopyGeneration.cpp
+++ b/mlir/lib/Transforms/AffineDataCopyGeneration.cpp
@@ -1,14 +1,268 @@
-struct A {
- int a;
-};
+//===- AffineDataCopyGeneration.cpp - Explicit memref copying pass ------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass to automatically promote accessed memref regions
+// to buffers in a faster memory space that is explicitly managed, with the
+// necessary data movement operations performed through either regular
+// point-wise load/store's or DMAs. Such explicit copying (also referred to as
+// array packing/unpacking in the literature), when done on arrays that exhibit
+// reuse, results in near elimination of conflict misses, TLB misses, reduced
+// use of hardware prefetch streams, and reduced false sharing. It is also
+// necessary for hardware that explicitly managed levels in the memory
+// hierarchy, and where DMAs may have to be used. This optimization is often
+// performed on already tiled code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Analysis/Utils.h"
+#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/LoopUtils.h"
+#include "mlir/Transforms/Passes.h"
+#include "mlir/Transforms/Utils.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include <algorithm>
+
+#define DEBUG_TYPE "affine-data-copy-generate"
+
+using namespace mlir;
+
+static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
+
+static llvm::cl::opt<unsigned long long> clFastMemoryCapacity(
+ "affine-data-copy-generate-fast-mem-capacity",
+ llvm::cl::desc(
+ "Set fast memory space capacity in KiB (default: unlimited)"),
+ llvm::cl::cat(clOptionsCategory));
+
+static llvm::cl::opt<bool>
+ clDma("affine-data-copy-generate-dma",
+ llvm::cl::desc("Generate DMA instead of point-wise copy"),
+ llvm::cl::cat(clOptionsCategory), llvm::cl::init(true));
+
+static llvm::cl::opt<unsigned> clFastMemorySpace(
+ "affine-data-copy-generate-fast-mem-space", llvm::cl::init(1),
+ llvm::cl::desc(
+ "Fast memory space identifier for copy generation (default: 1)"),
+ llvm::cl::cat(clOptionsCategory));
+
+static llvm::cl::opt<bool> clSkipNonUnitStrideLoop(
+ "affine-data-copy-generate-skip-non-unit-stride-loops", llvm::cl::Hidden,
+ llvm::cl::init(false),
+ llvm::cl::desc("Testing purposes: avoid non-unit stride loop choice depths "
+ "for copy placement"),
+ llvm::cl::cat(clOptionsCategory));
-static A a;
+namespace {
-struct B {
- B(int b = a.a) {}
+/// Replaces all loads and stores on memref's living in 'slowMemorySpace' by
+/// introducing copy operations to transfer data into `fastMemorySpace` and
+/// rewriting the original load's/store's to instead load/store from the
+/// allocated fast memory buffers. Additional options specify the identifier
+/// corresponding to the fast memory space and the amount of fast memory space
+/// available. The pass traverses through the nesting structure, recursing to
+/// inner levels if necessary to determine at what depth copies need to be
+/// placed so that the allocated buffers fit within the memory capacity
+/// provided.
+// TODO(bondhugula): We currently can't generate copies correctly when stores
+// are strided. Check for strided stores.
+struct AffineDataCopyGeneration
+ : public FunctionPass<AffineDataCopyGeneration> {
+ explicit AffineDataCopyGeneration(
+ unsigned slowMemorySpace = 0,
+ unsigned fastMemorySpace = clFastMemorySpace, unsigned tagMemorySpace = 0,
+ int minDmaTransferSize = 1024,
+ uint64_t fastMemCapacityBytes =
+ (clFastMemoryCapacity.getNumOccurrences() > 0
+ ? clFastMemoryCapacity * 1024 // cl-provided size is in KiB
+ : std::numeric_limits<uint64_t>::max()),
+ bool generateDma = clDma,
+ bool skipNonUnitStrideLoops = clSkipNonUnitStrideLoop)
+ : slowMemorySpace(slowMemorySpace), fastMemorySpace(fastMemorySpace),
+ tagMemorySpace(tagMemorySpace), minDmaTransferSize(minDmaTransferSize),
+ fastMemCapacityBytes(fastMemCapacityBytes), generateDma(generateDma),
+ skipNonUnitStrideLoops(skipNonUnitStrideLoops) {}
+
+ explicit AffineDataCopyGeneration(const AffineDataCopyGeneration &other)
+ : slowMemorySpace(other.slowMemorySpace),
+ fastMemorySpace(other.fastMemorySpace),
+ tagMemorySpace(other.tagMemorySpace),
+ minDmaTransferSize(other.minDmaTransferSize),
+ fastMemCapacityBytes(other.fastMemCapacityBytes),
+ generateDma(other.generateDma),
+ skipNonUnitStrideLoops(other.skipNonUnitStrideLoops) {}
+
+ void runOnFunction() override;
+ LogicalResult runOnBlock(Block *block, DenseSet<Operation *> ©Nests);
+
+ // Slow memory space associated with copies.
+ const unsigned slowMemorySpace;
+ // Fast memory space associated with copies.
+ unsigned fastMemorySpace;
+ // Memory space associated with DMA tags.
+ unsigned tagMemorySpace;
+ // Minimum DMA transfer size supported by the target in bytes.
+ const int minDmaTransferSize;
+ // Capacity of the faster memory space.
+ uint64_t fastMemCapacityBytes;
+
+ // If set, generate DMA operations instead of read/write.
+ bool generateDma;
+
+ // If set, ignore loops with steps other than 1.
+ bool skipNonUnitStrideLoops;
+
+ // Constant zero index to avoid too many duplicates.
+ Value zeroIndex = nullptr;
};
+} // end anonymous namespace
-void foo() {
- B();
+/// Generates copies for memref's living in 'slowMemorySpace' into newly created
+/// buffers in 'fastMemorySpace', and replaces memory operations to the former
+/// by the latter. Only load op's handled for now.
+/// TODO(bondhugula): extend this to store op's.
+std::unique_ptr<OpPassBase<FuncOp>> mlir::createAffineDataCopyGenerationPass(
+ unsigned slowMemorySpace, unsigned fastMemorySpace, unsigned tagMemorySpace,
+ int minDmaTransferSize, uint64_t fastMemCapacityBytes) {
+ return std::make_unique<AffineDataCopyGeneration>(
+ slowMemorySpace, fastMemorySpace, tagMemorySpace, minDmaTransferSize,
+ fastMemCapacityBytes);
}
+
+/// Generate copies for this block. The block is partitioned into separate
+/// ranges: each range is either a sequence of one or more operations starting
+/// and ending with an affine load or store op, or just an affine.forop (which
+/// could have other affine for op's nested within).
+LogicalResult
+AffineDataCopyGeneration::runOnBlock(Block *block,
+ DenseSet<Operation *> ©Nests) {
+ if (block->empty())
+ return success();
+
+ AffineCopyOptions copyOptions = {generateDma, slowMemorySpace,
+ fastMemorySpace, tagMemorySpace,
+ fastMemCapacityBytes};
+
+ // Every affine.forop in the block starts and ends a block range for copying;
+ // in addition, a contiguous sequence of operations starting with a
+ // load/store op but not including any copy nests themselves is also
+ // identified as a copy block range. Straightline code (a contiguous chunk of
+ // operations excluding AffineForOp's) are always assumed to not exhaust
+ // memory. As a result, this approach is conservative in some cases at the
+ // moment; we do a check later and report an error with location info.
+ // TODO(bondhugula): An 'affine.if' operation is being treated similar to an
+ // operation. 'affine.if''s could have 'affine.for's in them;
+ // treat them separately.
+
+ // Get to the first load, store, or for op (that is not a copy nest itself).
+ auto curBegin =
+ std::find_if(block->begin(), block->end(), [&](Operation &op) {
+ return (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op) ||
+ isa<AffineForOp>(op)) &&
+ copyNests.count(&op) == 0;
+ });
+
+ // Create [begin, end) ranges.
+ auto it = curBegin;
+ while (it != block->end()) {
+ AffineForOp forOp;
+ // If you hit a non-copy for loop, we will split there.
+ if ((forOp = dyn_cast<AffineForOp>(&*it)) && copyNests.count(forOp) == 0) {
+ // Perform the copying up unti this 'for' op first.
+ affineDataCopyGenerate(/*begin=*/curBegin, /*end=*/it, copyOptions,
+ /*filterMemRef=*/llvm::None, copyNests);
+
+ // Returns true if the footprint is known to exceed capacity.
+ auto exceedsCapacity = [&](AffineForOp forOp) {
+ Optional<int64_t> footprint =
+ getMemoryFootprintBytes(forOp,
+ /*memorySpace=*/0);
+ return (footprint.hasValue() &&
+ static_cast<uint64_t>(footprint.getValue()) >
+ fastMemCapacityBytes);
+ };
+
+ // If the memory footprint of the 'affine.for' loop is higher than fast
+ // memory capacity (when provided), we recurse to copy at an inner level
+ // until we find a depth at which footprint fits in fast mem capacity. If
+ // the footprint can't be calculated, we assume for now it fits. Recurse
+ // inside if footprint for 'forOp' exceeds capacity, or when
+ // skipNonUnitStrideLoops is set and the step size is not one.
+ bool recurseInner = skipNonUnitStrideLoops ? forOp.getStep() != 1
+ : exceedsCapacity(forOp);
+ if (recurseInner) {
+ // We'll recurse and do the copies at an inner level for 'forInst'.
+ // Recurse onto the body of this loop.
+ runOnBlock(forOp.getBody(), copyNests);
+ } else {
+ // We have enough capacity, i.e., copies will be computed for the
+ // portion of the block until 'it', and for 'it', which is 'forOp'. Note
+ // that for the latter, the copies are placed just before this loop (for
+ // incoming copies) and right after (for outgoing ones).
+
+ // Inner loop copies have their own scope - we don't thus update
+ // consumed capacity. The footprint check above guarantees this inner
+ // loop's footprint fits.
+ affineDataCopyGenerate(/*begin=*/it, /*end=*/std::next(it), copyOptions,
+ /*filterMemRef=*/llvm::None, copyNests);
+ }
+ // Get to the next load or store op after 'forOp'.
+ curBegin = std::find_if(std::next(it), block->end(), [&](Operation &op) {
+ return (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op) ||
+ isa<AffineForOp>(op)) &&
+ copyNests.count(&op) == 0;
+ });
+ it = curBegin;
+ } else {
+ assert(copyNests.count(&*it) == 0 &&
+ "all copy nests generated should have been skipped above");
+ // We simply include this op in the current range and continue for more.
+ ++it;
+ }
+ }
+
+ // Generate the copy for the final block range.
+ if (curBegin != block->end()) {
+ // Can't be a terminator because it would have been skipped above.
+ assert(!curBegin->isKnownTerminator() && "can't be a terminator");
+ // Exclude the affine terminator - hence, the std::prev.
+ affineDataCopyGenerate(/*begin=*/curBegin, /*end=*/std::prev(block->end()),
+ copyOptions, /*filterMemRef=*/llvm::None, copyNests);
+ }
+
+ return success();
+}
+
+void AffineDataCopyGeneration::runOnFunction() {
+ FuncOp f = getFunction();
+ OpBuilder topBuilder(f.getBody());
+ zeroIndex = topBuilder.create<ConstantIndexOp>(f.getLoc(), 0);
+
+ // Nests that are copy-in's or copy-out's; the root AffineForOps of those
+ // nests are stored herein.
+ DenseSet<Operation *> copyNests;
+
+ // Clear recorded copy nests.
+ copyNests.clear();
+
+ for (auto &block : f)
+ runOnBlock(&block, copyNests);
+
+ // Promote any single iteration loops in the copy nests.
+ for (auto nest : copyNests) {
+ nest->walk([](AffineForOp forOp) { promoteIfSingleIteration(forOp); });
+ }
+}
+
+static PassRegistration<AffineDataCopyGeneration>
+ pass("affine-data-copy-generate",
+ "Generate explicit copying for memory operations");
More information about the Mlir-commits
mailing list