[llvm] 400ad6f - [mlir] Eliminate the remaining usages of cl::opt instead of PassOption.
River Riddle via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 8 13:09:31 PDT 2020
Author: River Riddle
Date: 2020-04-08T13:05:08-07:00
New Revision: 400ad6f95d7a8d425463571f5c24efc562677ce7
URL: https://github.com/llvm/llvm-project/commit/400ad6f95d7a8d425463571f5c24efc562677ce7
DIFF: https://github.com/llvm/llvm-project/commit/400ad6f95d7a8d425463571f5c24efc562677ce7.diff
LOG: [mlir] Eliminate the remaining usages of cl::opt instead of PassOption.
Summary: Pass options are a better choice for various reasons and avoid the need for static constructors.
Differential Revision: https://reviews.llvm.org/D77707
Added:
Modified:
llvm/include/llvm/Support/CommandLine.h
mlir/include/mlir/Dialect/Affine/Passes.h
mlir/include/mlir/Dialect/Affine/Passes.td
mlir/include/mlir/IR/OperationSupport.h
mlir/include/mlir/Pass/PassOptions.h
mlir/include/mlir/Transforms/Passes.td
mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp
mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp
mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
mlir/lib/IR/AsmPrinter.cpp
mlir/lib/Transforms/Inliner.cpp
mlir/lib/Transforms/LoopFusion.cpp
mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
mlir/lib/Transforms/ViewOpGraph.cpp
mlir/test/Dialect/Affine/affine-data-copy.mlir
mlir/test/Dialect/Affine/dma-generate.mlir
mlir/test/Dialect/Affine/inlining.mlir
mlir/test/Dialect/Affine/loop-tiling.mlir
mlir/test/Dialect/Affine/unroll-jam.mlir
mlir/test/Dialect/Affine/unroll.mlir
mlir/test/Dialect/SPIRV/Transforms/inlining.mlir
mlir/test/Transforms/inlining.mlir
mlir/test/Transforms/loop-fusion.mlir
mlir/test/lib/Pass/TestPassManager.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h
index 0242e35c05bd..2c87d8fbbaad 100644
--- a/llvm/include/llvm/Support/CommandLine.h
+++ b/llvm/include/llvm/Support/CommandLine.h
@@ -1606,8 +1606,8 @@ template <class DataType> class list_storage<DataType, bool> {
reference front() { return Storage.front(); }
const_reference front() const { return Storage.front(); }
- operator std::vector<DataType>&() { return Storage; }
- operator ArrayRef<DataType>() { return Storage; }
+ operator std::vector<DataType> &() { return Storage; }
+ operator ArrayRef<DataType>() const { return Storage; }
std::vector<DataType> *operator&() { return &Storage; }
const std::vector<DataType> *operator&() const { return &Storage; }
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h
index 5d3b997b9b58..0d7c3be240c9 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.h
+++ b/mlir/include/mlir/Dialect/Affine/Passes.h
@@ -59,7 +59,7 @@ std::unique_ptr<OperationPass<FuncOp>> createLoopTilingPass();
/// and no callback is provided, anything passed from the command-line (if at
/// all) or the default unroll factor is used (LoopUnroll:kDefaultUnrollFactor).
std::unique_ptr<OperationPass<FuncOp>> createLoopUnrollPass(
- int unrollFactor = -1, int unrollFull = -1,
+ int unrollFactor = -1, bool unrollFull = false,
const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr);
/// Creates a loop unroll jam pass to unroll jam by the specified factor. A
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td
index 8fef9961510a..06e0920413a9 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.td
+++ b/mlir/include/mlir/Dialect/Affine/Passes.td
@@ -18,6 +18,28 @@ include "mlir/Pass/PassBase.td"
def AffineDataCopyGeneration : FunctionPass<"affine-data-copy-generate"> {
let summary = "Generate explicit copying for affine memory operations";
let constructor = "mlir::createAffineDataCopyGenerationPass()";
+ let options = [
+ Option<"fastMemoryCapacity", "fast-mem-capacity", "uint64_t",
+ /*default=*/"std::numeric_limits<uint64_t>::max()",
+ "Set fast memory space capacity in KiB (default: unlimited)">,
+ Option<"fastMemorySpace", "fast-mem-space", "unsigned",
+ /*default=*/"1",
+ "Fast memory space identifier for copy generation (default: 1)">,
+ Option<"generateDma", "generate-dma", "bool",
+ /*default=*/"true", "Generate DMA instead of point-wise copy">,
+ Option<"minDmaTransferSize", "min-dma-transfer", "int",
+ /*default=*/"1024",
+ "Minimum DMA transfer size supported by the target in bytes">,
+ Option<"slowMemorySpace", "slow-mem-space", "unsigned",
+ /*default=*/"0",
+ "Slow memory space identifier for copy generation (default: 0)">,
+ Option<"skipNonUnitStrideLoops", "skip-non-unit-stride-loops", "bool",
+ /*default=*/"false", "Testing purposes: avoid non-unit stride loop "
+ "choice depths for copy placement">,
+ Option<"tagMemorySpace", "tag-mem-space", "unsigned",
+ /*default=*/"0",
+ "Tag memory space identifier for copy generation (default: 0)">,
+ ];
}
def AffineLoopInvariantCodeMotion
@@ -29,16 +51,44 @@ def AffineLoopInvariantCodeMotion
def AffineLoopTiling : FunctionPass<"affine-loop-tile"> {
let summary = "Tile affine loop nests";
let constructor = "mlir::createLoopTilingPass()";
+ let options = [
+ Option<"cacheSizeInKiB", "cache-size", "uint64_t", /*default=*/"512",
+ "Set size of cache to tile for in KiB">,
+ Option<"separate", "separate", "bool", /*default=*/"",
+ "Separate full and partial tiles">,
+ Option<"tileSize", "tile-size", "unsigned", /*default=*/"",
+ "Use this tile size for all loops">,
+ ListOption<"tileSizes", "tile-sizes", "unsigned",
+ "List of tile sizes for each perfect nest "
+ "(overridden by -tile-size)",
+ "llvm::cl::ZeroOrMore">,
+ ];
}
def AffineLoopUnroll : FunctionPass<"affine-loop-unroll"> {
let summary = "Unroll affine loops";
let constructor = "mlir::createLoopUnrollPass()";
+ let options = [
+ Option<"unrollFactor", "unroll-factor", "unsigned", /*default=*/"4",
+ "Use this unroll factor for all loops being unrolled">,
+ Option<"unrollFull", "unroll-full", "bool", /*default=*/"false",
+ "Fully unroll loops">,
+ Option<"numRepetitions", "unroll-num-reps", "unsigned", /*default=*/"1",
+ "Unroll innermost loops repeatedly this many times">,
+ Option<"unrollFullThreshold", "unroll-full-threshold", "unsigned",
+ /*default=*/"1",
+ "Unroll all loops with trip count less than or equal to this">,
+ ];
}
def AffineLoopUnrollAndJam : FunctionPass<"affine-loop-unroll-jam"> {
let summary = "Unroll and jam affine loops";
let constructor = "mlir::createLoopUnrollAndJamPass()";
+ let options = [
+ Option<"unrollJamFactor", "unroll-jam-factor", "unsigned",
+ /*default=*/"4",
+ "Use this unroll jam factor for all loops (default 4)">,
+ ];
}
def AffineVectorize : FunctionPass<"affine-super-vectorize"> {
diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h
index 892888622cd6..8d6bda3dbba0 100644
--- a/mlir/include/mlir/IR/OperationSupport.h
+++ b/mlir/include/mlir/IR/OperationSupport.h
@@ -514,6 +514,9 @@ class OpPrintingFlags {
/// Return if the given ElementsAttr should be elided.
bool shouldElideElementsAttr(ElementsAttr attr) const;
+ /// Return the size limit for printing large ElementsAttr.
+ Optional<int64_t> getLargeElementsAttrLimit() const;
+
/// Return if debug information should be printed.
bool shouldPrintDebugInfo() const;
diff --git a/mlir/include/mlir/Pass/PassOptions.h b/mlir/include/mlir/Pass/PassOptions.h
index d360bad673fa..bb2592d08d72 100644
--- a/mlir/include/mlir/Pass/PassOptions.h
+++ b/mlir/include/mlir/Pass/PassOptions.h
@@ -42,6 +42,9 @@ class PassOptions : protected llvm::cl::SubCommand {
/// Return the argument string of this option.
StringRef getArgStr() const { return getOption()->ArgStr; }
+ /// Returns true if this option has any value assigned to it.
+ bool hasValue() const { return optHasValue; }
+
protected:
/// Return the main option instance.
virtual const llvm::cl::Option *getOption() const = 0;
@@ -49,6 +52,9 @@ class PassOptions : protected llvm::cl::SubCommand {
/// Copy the value from the given option into this one.
virtual void copyValueFrom(const OptionBase &other) = 0;
+ /// Flag indicating if this option has a value.
+ bool optHasValue = false;
+
/// Allow access to private methods.
friend PassOptions;
};
@@ -113,10 +119,17 @@ class PassOptions : protected llvm::cl::SubCommand {
assert(!this->isPositional() && !this->isSink() &&
"sink and positional options are not supported");
parent.options.push_back(this);
+
+ // Set a callback to track if this option has a value.
+ this->setCallback([this](const auto &) { this->optHasValue = true; });
}
+ ~Option() override = default;
using llvm::cl::opt<DataType, /*ExternalStorage=*/false,
OptionParser>::operator=;
- ~Option() override = default;
+ Option &operator=(const Option &other) {
+ *this = other.getValue();
+ return *this;
+ }
private:
/// Return the main option instance.
@@ -132,6 +145,7 @@ class PassOptions : protected llvm::cl::SubCommand {
void copyValueFrom(const OptionBase &other) final {
this->setValue(static_cast<const Option<DataType, OptionParser> &>(other)
.getValue());
+ optHasValue = other.optHasValue;
}
};
@@ -149,16 +163,26 @@ class PassOptions : protected llvm::cl::SubCommand {
assert(!this->isPositional() && !this->isSink() &&
"sink and positional options are not supported");
parent.options.push_back(this);
+
+ // Set a callback to track if this option has a value.
+ this->setCallback([this](const auto &) { this->optHasValue = true; });
}
~ListOption() override = default;
+ ListOption<DataType, OptionParser> &
+ operator=(const ListOption<DataType, OptionParser> &other) {
+ *this = ArrayRef<DataType>(other);
+ this->optHasValue = other.optHasValue;
+ return *this;
+ }
/// Allow assigning from an ArrayRef.
ListOption<DataType, OptionParser> &operator=(ArrayRef<DataType> values) {
- (*this)->assign(values.begin(), values.end());
+ ((std::vector<DataType> &)*this).assign(values.begin(), values.end());
+ optHasValue = true;
return *this;
}
- std::vector<DataType> *operator->() { return &*this; }
+ MutableArrayRef<DataType> operator->() const { return &*this; }
private:
/// Return the main option instance.
@@ -175,9 +199,7 @@ class PassOptions : protected llvm::cl::SubCommand {
/// Copy the value from the given option into this one.
void copyValueFrom(const OptionBase &other) final {
- (*this) = ArrayRef<DataType>(
- (ListOption<DataType, OptionParser> &)(const_cast<OptionBase &>(
- other)));
+ *this = static_cast<const ListOption<DataType, OptionParser> &>(other);
}
};
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index 04cf0877c338..70b8f8113703 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -15,6 +15,24 @@
include "mlir/Pass/PassBase.td"
+def AffineLoopFusion : FunctionPass<"affine-loop-fusion"> {
+ let summary = "Fuse affine loop nests";
+ let constructor = "mlir::createLoopFusionPass()";
+ let options = [
+ Option<"computeToleranceThreshold", "fusion-compute-tolerance", "double",
+ /*default=*/"0.30f", "Fractional increase in additional computation "
+ "tolerated while fusing">,
+ Option<"fastMemorySpace", "fusion-fast-mem-space", "unsigned",
+ /*default=*/"0",
+ "Faster memory space number to promote fusion buffers to">,
+ Option<"localBufSizeThreshold", "fusion-local-buf-threshold", "uint64_t",
+ /*default=*/"0", "Threshold size (KiB) for promoting local buffers "
+ "to fast memory space">,
+ Option<"maximalFusion", "fusion-maximal", "bool", /*default=*/"false",
+ "Enables maximal loop fusion">,
+ ];
+}
+
def AffinePipelineDataTransfer
: FunctionPass<"affine-pipeline-data-transfer"> {
let summary = "Pipeline non-blocking data transfers between explicitly "
@@ -84,11 +102,6 @@ def AffinePipelineDataTransfer
let constructor = "mlir::createPipelineDataTransferPass()";
}
-def AffineLoopFusion : FunctionPass<"affine-loop-fusion"> {
- let summary = "Fuse affine loop nests";
- let constructor = "mlir::createLoopFusionPass()";
-}
-
def Canonicalizer : Pass<"canonicalize"> {
let summary = "Canonicalize operations";
let constructor = "mlir::createCanonicalizerPass()";
@@ -106,6 +119,14 @@ def CSE : Pass<"cse"> {
def Inliner : Pass<"inline"> {
let summary = "Inline function calls";
let constructor = "mlir::createInlinerPass()";
+ let options = [
+ Option<"disableCanonicalization", "disable-simplify", "bool",
+ /*default=*/"false",
+ "Disable running simplifications during inlining">,
+ Option<"maxInliningIterations", "max-iterations", "unsigned",
+ /*default=*/"4",
+ "Maximum number of iterations when inlining within an SCC">,
+ ];
}
def LocationSnapshot : Pass<"snapshot-op-locations"> {
@@ -113,7 +134,7 @@ def LocationSnapshot : Pass<"snapshot-op-locations"> {
let constructor = "mlir::createLocationSnapshotPass()";
let options = [
Option<"fileName", "filename", "std::string", /*default=*/"",
- "The filename to print the generated IR.">,
+ "The filename to print the generated IR">,
Option<"tag", "tag", "std::string", /*default=*/"",
"A tag to use when fusing the new locations with the "
"original. If unset, the locations are replaced.">,
diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
index 9ea64d9384c6..a597dd7bf078 100644
--- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
+++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp
@@ -61,8 +61,8 @@ struct ImperfectlyNestedForLoopMapper
ImperfectlyNestedForLoopMapper() = default;
ImperfectlyNestedForLoopMapper(ArrayRef<int64_t> numWorkGroups,
ArrayRef<int64_t> workGroupSize) {
- this->numWorkGroups->assign(numWorkGroups.begin(), numWorkGroups.end());
- this->workGroupSize->assign(workGroupSize.begin(), workGroupSize.end());
+ this->numWorkGroups = numWorkGroups;
+ this->workGroupSize = workGroupSize;
}
void runOnFunction() override {
diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
index a0525470d9ed..bff60f417082 100644
--- a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
@@ -35,32 +35,6 @@
using namespace mlir;
-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-static llvm::cl::opt<unsigned long long> clFastMemoryCapacity(
- "affine-data-copy-generate-fast-mem-capacity",
- llvm::cl::desc(
- "Set fast memory space capacity in KiB (default: unlimited)"),
- llvm::cl::cat(clOptionsCategory));
-
-static llvm::cl::opt<bool>
- clDma("affine-data-copy-generate-dma",
- llvm::cl::desc("Generate DMA instead of point-wise copy"),
- llvm::cl::cat(clOptionsCategory), llvm::cl::init(true));
-
-static llvm::cl::opt<unsigned> clFastMemorySpace(
- "affine-data-copy-generate-fast-mem-space", llvm::cl::init(1),
- llvm::cl::desc(
- "Fast memory space identifier for copy generation (default: 1)"),
- llvm::cl::cat(clOptionsCategory));
-
-static llvm::cl::opt<bool> clSkipNonUnitStrideLoop(
- "affine-data-copy-generate-skip-non-unit-stride-loops", llvm::cl::Hidden,
- llvm::cl::init(false),
- llvm::cl::desc("Testing purposes: avoid non-unit stride loop choice depths "
- "for copy placement"),
- llvm::cl::cat(clOptionsCategory));
-
namespace {
/// Replaces all loads and stores on memref's living in 'slowMemorySpace' by
@@ -76,51 +50,22 @@ namespace {
// are strided. Check for strided stores.
struct AffineDataCopyGeneration
: public AffineDataCopyGenerationBase<AffineDataCopyGeneration> {
- explicit AffineDataCopyGeneration(
- unsigned slowMemorySpace = 0,
- unsigned fastMemorySpace = clFastMemorySpace, unsigned tagMemorySpace = 0,
- int minDmaTransferSize = 1024,
- uint64_t fastMemCapacityBytes =
- (clFastMemoryCapacity.getNumOccurrences() > 0
- ? clFastMemoryCapacity * 1024 // cl-provided size is in KiB
- : std::numeric_limits<uint64_t>::max()),
- bool generateDma = clDma,
- bool skipNonUnitStrideLoops = clSkipNonUnitStrideLoop)
- : slowMemorySpace(slowMemorySpace), fastMemorySpace(fastMemorySpace),
- tagMemorySpace(tagMemorySpace), minDmaTransferSize(minDmaTransferSize),
- fastMemCapacityBytes(fastMemCapacityBytes), generateDma(generateDma),
- skipNonUnitStrideLoops(skipNonUnitStrideLoops) {}
-
- explicit AffineDataCopyGeneration(const AffineDataCopyGeneration &other)
- : AffineDataCopyGenerationBase<AffineDataCopyGeneration>(other),
- slowMemorySpace(other.slowMemorySpace),
- fastMemorySpace(other.fastMemorySpace),
- tagMemorySpace(other.tagMemorySpace),
- minDmaTransferSize(other.minDmaTransferSize),
- fastMemCapacityBytes(other.fastMemCapacityBytes),
- generateDma(other.generateDma),
- skipNonUnitStrideLoops(other.skipNonUnitStrideLoops) {}
+ AffineDataCopyGeneration() = default;
+ explicit AffineDataCopyGeneration(unsigned slowMemorySpace,
+ unsigned fastMemorySpace,
+ unsigned tagMemorySpace,
+ int minDmaTransferSize,
+ uint64_t fastMemCapacityBytes) {
+ this->slowMemorySpace = slowMemorySpace;
+ this->fastMemorySpace = fastMemorySpace;
+ this->tagMemorySpace = tagMemorySpace;
+ this->minDmaTransferSize = minDmaTransferSize;
+ this->fastMemoryCapacity = fastMemCapacityBytes / 1024;
+ }
void runOnFunction() override;
LogicalResult runOnBlock(Block *block, DenseSet<Operation *> ©Nests);
- // Slow memory space associated with copies.
- const unsigned slowMemorySpace;
- // Fast memory space associated with copies.
- unsigned fastMemorySpace;
- // Memory space associated with DMA tags.
- unsigned tagMemorySpace;
- // Minimum DMA transfer size supported by the target in bytes.
- const int minDmaTransferSize;
- // Capacity of the faster memory space.
- uint64_t fastMemCapacityBytes;
-
- // If set, generate DMA operations instead of read/write.
- bool generateDma;
-
- // If set, ignore loops with steps other than 1.
- bool skipNonUnitStrideLoops;
-
// Constant zero index to avoid too many duplicates.
Value zeroIndex = nullptr;
};
@@ -153,6 +98,10 @@ AffineDataCopyGeneration::runOnBlock(Block *block,
if (block->empty())
return success();
+ uint64_t fastMemCapacityBytes =
+ fastMemoryCapacity != std::numeric_limits<uint64_t>::max()
+ ? fastMemoryCapacity * 1024
+ : fastMemoryCapacity;
AffineCopyOptions copyOptions = {generateDma, slowMemorySpace,
fastMemorySpace, tagMemorySpace,
fastMemCapacityBytes};
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp
index 0e8e10983151..b15a73720c1b 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp
@@ -28,40 +28,15 @@ using namespace mlir;
#define DEBUG_TYPE "affine-loop-tile"
-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-static llvm::cl::opt<unsigned long long>
- clCacheSizeKiB("affine-tile-cache-size",
- llvm::cl::desc("Set size of cache to tile for in KiB"),
- llvm::cl::cat(clOptionsCategory));
-
-// Separate full and partial tiles.
-static llvm::cl::opt<bool>
- clSeparate("affine-tile-separate",
- llvm::cl::desc("Separate full and partial tiles"),
- llvm::cl::cat(clOptionsCategory));
-
-// Tile size to use for all loops (overrides -tile-sizes if provided).
-static llvm::cl::opt<unsigned>
- clTileSize("affine-tile-size",
- llvm::cl::desc("Use this tile size for all loops"),
- llvm::cl::cat(clOptionsCategory));
-
-// List of tile sizes. If any of them aren't provided, they are filled with
-// clTileSize / kDefaultTileSize.
-static llvm::cl::list<unsigned> clTileSizes(
- "affine-tile-sizes",
- llvm::cl::desc(
- "List of tile sizes for each perfect nest (overridden by -tile-size)"),
- llvm::cl::ZeroOrMore, llvm::cl::cat(clOptionsCategory));
-
namespace {
/// A pass to perform loop tiling on all suitable loop nests of a Function.
struct LoopTiling : public AffineLoopTilingBase<LoopTiling> {
- explicit LoopTiling(uint64_t cacheSizeBytes = kDefaultCacheMemCapacity,
- bool avoidMaxMinBounds = true)
- : cacheSizeBytes(cacheSizeBytes), avoidMaxMinBounds(avoidMaxMinBounds) {}
+ LoopTiling() = default;
+ explicit LoopTiling(uint64_t cacheSizeBytes, bool avoidMaxMinBounds = true)
+ : avoidMaxMinBounds(avoidMaxMinBounds) {
+ this->cacheSizeInKiB = cacheSizeBytes / 1024;
+ }
void runOnFunction() override;
void getTileSizes(ArrayRef<AffineForOp> band,
@@ -69,12 +44,9 @@ struct LoopTiling : public AffineLoopTilingBase<LoopTiling> {
// Default tile size if nothing is provided.
constexpr static unsigned kDefaultTileSize = 4;
- constexpr static uint64_t kDefaultCacheMemCapacity = 512 * 1024UL;
- // Capacity of the cache to tile for.
- uint64_t cacheSizeBytes;
// If true, tile sizes are set to avoid max/min in bounds if possible.
- bool avoidMaxMinBounds;
+ bool avoidMaxMinBounds = true;
};
} // end anonymous namespace
@@ -316,23 +288,19 @@ void LoopTiling::getTileSizes(ArrayRef<AffineForOp> band,
if (band.empty())
return;
- tileSizes->resize(band.size());
-
- // Use clTileSize for all loops if specified.
- if (clTileSize.getNumOccurrences() > 0) {
- std::fill(tileSizes->begin(), tileSizes->end(), clTileSize);
+ // Use tileSize for all loops if specified.
+ if (tileSize.hasValue()) {
+ tileSizes->assign(band.size(), tileSize);
return;
}
- // Use clTileSizes and fill them with default tile size if it's short.
- if (!clTileSizes.empty()) {
- std::fill(tileSizes->begin(), tileSizes->end(),
- LoopTiling::kDefaultTileSize);
- std::copy(clTileSizes.begin(),
- clTileSizes.begin() + std::min(clTileSizes.size(), band.size()),
- tileSizes->begin());
+ // Use tileSizes and fill them with default tile size if it's short.
+ if (!this->tileSizes.empty()) {
+ tileSizes->assign(this->tileSizes.begin(), this->tileSizes.end());
+ tileSizes->resize(band.size(), kDefaultTileSize);
return;
}
+ tileSizes->resize(band.size());
// The first loop in the band.
auto rootForOp = band[0];
@@ -356,6 +324,7 @@ void LoopTiling::getTileSizes(ArrayRef<AffineForOp> band,
}
// Check how many times larger the cache size is when compared to footprint.
+ uint64_t cacheSizeBytes = cacheSizeInKiB * 1024;
uint64_t excessFactor = llvm::divideCeil(fp.getValue(), cacheSizeBytes);
if (excessFactor <= 1) {
// No need of any tiling - set tile size to 1.
@@ -388,10 +357,6 @@ void LoopTiling::getTileSizes(ArrayRef<AffineForOp> band,
}
void LoopTiling::runOnFunction() {
- // Override cache size if provided on command line.
- if (clCacheSizeKiB.getNumOccurrences() > 0)
- cacheSizeBytes = clCacheSizeKiB * 1024;
-
// Bands of loops to tile.
std::vector<SmallVector<AffineForOp, 6>> bands;
getTileableBands(getFunction(), &bands);
@@ -399,7 +364,7 @@ void LoopTiling::runOnFunction() {
// Tile each band.
for (auto &band : bands) {
// Set up tile sizes; fill missing tile sizes at the end with default tile
- // size or clTileSize if one was provided.
+ // size or tileSize if one was provided.
SmallVector<unsigned, 6> tileSizes;
getTileSizes(band, &tileSizes);
if (llvm::DebugFlag) {
@@ -413,7 +378,7 @@ void LoopTiling::runOnFunction() {
return signalPassFailure();
// Separate full and partial tiles.
- if (clSeparate) {
+ if (separate) {
auto intraTileLoops =
MutableArrayRef<AffineForOp>(tiledNest).drop_front(band.size());
separateFullTiles(intraTileLoops);
@@ -422,4 +387,3 @@ void LoopTiling::runOnFunction() {
}
constexpr unsigned LoopTiling::kDefaultTileSize;
-constexpr uint64_t LoopTiling::kDefaultCacheMemCapacity;
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
index f02cf36ee5a7..1dcbc81f640a 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
@@ -59,24 +59,27 @@ namespace {
/// with trip count less than the specified threshold. The latter is for testing
/// purposes, especially for testing outer loop unrolling.
struct LoopUnroll : public AffineLoopUnrollBase<LoopUnroll> {
- const Optional<unsigned> unrollFactor;
- const Optional<bool> unrollFull;
// Callback to obtain unroll factors; if this has a callable target, takes
// precedence over command-line argument or passed argument.
const std::function<unsigned(AffineForOp)> getUnrollFactor;
+ LoopUnroll() : getUnrollFactor(nullptr) {}
+ LoopUnroll(const LoopUnroll &other)
+ : AffineLoopUnrollBase<LoopUnroll>(other),
+ getUnrollFactor(other.getUnrollFactor) {}
explicit LoopUnroll(
- Optional<unsigned> unrollFactor = None, Optional<bool> unrollFull = None,
+ Optional<unsigned> unrollFactor = None, bool unrollFull = false,
const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr)
- : unrollFactor(unrollFactor), unrollFull(unrollFull),
- getUnrollFactor(getUnrollFactor) {}
+ : getUnrollFactor(getUnrollFactor) {
+ if (unrollFactor)
+ this->unrollFactor = *unrollFactor;
+ this->unrollFull = unrollFull;
+ }
void runOnFunction() override;
/// Unroll this for op. Returns failure if nothing was done.
LogicalResult runOnAffineForOp(AffineForOp forOp);
-
- static const unsigned kDefaultUnrollFactor = 4;
};
} // end anonymous namespace
@@ -102,8 +105,7 @@ static void gatherInnermostLoops(FuncOp f,
}
void LoopUnroll::runOnFunction() {
- if (clUnrollFull.getNumOccurrences() > 0 &&
- clUnrollFullThreshold.getNumOccurrences() > 0) {
+ if (unrollFull && unrollFullThreshold.hasValue()) {
// Store short loops as we walk.
SmallVector<AffineForOp, 4> loops;
@@ -112,7 +114,7 @@ void LoopUnroll::runOnFunction() {
// an outer one may delete gathered inner ones).
getFunction().walk([&](AffineForOp forOp) {
Optional<uint64_t> tripCount = getConstantTripCount(forOp);
- if (tripCount.hasValue() && tripCount.getValue() <= clUnrollFullThreshold)
+ if (tripCount.hasValue() && tripCount.getValue() <= unrollFullThreshold)
loops.push_back(forOp);
});
for (auto forOp : loops)
@@ -120,9 +122,6 @@ void LoopUnroll::runOnFunction() {
return;
}
- unsigned numRepetitions = clUnrollNumRepetitions.getNumOccurrences() > 0
- ? clUnrollNumRepetitions
- : 1;
// If the call back is provided, we will recurse until no loops are found.
FuncOp func = getFunction();
SmallVector<AffineForOp, 4> loops;
@@ -144,28 +143,19 @@ void LoopUnroll::runOnFunction() {
/// failure otherwise. The default unroll factor is 4.
LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
// Use the function callback if one was provided.
- if (getUnrollFactor) {
+ if (getUnrollFactor)
return loopUnrollByFactor(forOp, getUnrollFactor(forOp));
- }
- // Unroll by the factor passed, if any.
- if (unrollFactor.hasValue())
- return loopUnrollByFactor(forOp, unrollFactor.getValue());
- // Unroll by the command line factor if one was specified.
- if (clUnrollFactor.getNumOccurrences() > 0)
- return loopUnrollByFactor(forOp, clUnrollFactor);
// Unroll completely if full loop unroll was specified.
- if (clUnrollFull.getNumOccurrences() > 0 ||
- (unrollFull.hasValue() && unrollFull.getValue()))
+ if (unrollFull)
return loopUnrollFull(forOp);
-
- // Unroll by four otherwise.
- return loopUnrollByFactor(forOp, kDefaultUnrollFactor);
+ // Otherwise, unroll by the given unroll factor.
+ return loopUnrollByFactor(forOp, unrollFactor);
}
std::unique_ptr<OperationPass<FuncOp>> mlir::createLoopUnrollPass(
- int unrollFactor, int unrollFull,
+ int unrollFactor, bool unrollFull,
const std::function<unsigned(AffineForOp)> &getUnrollFactor) {
return std::make_unique<LoopUnroll>(
- unrollFactor == -1 ? None : Optional<unsigned>(unrollFactor),
- unrollFull == -1 ? None : Optional<bool>(unrollFull), getUnrollFactor);
+ unrollFactor == -1 ? None : Optional<unsigned>(unrollFactor), unrollFull,
+ getUnrollFactor);
}
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp
index dffc7c0932e9..28830dac3660 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp
@@ -49,27 +49,16 @@ using namespace mlir;
#define DEBUG_TYPE "affine-loop-unroll-jam"
-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-// Loop unroll and jam factor.
-static llvm::cl::opt<unsigned>
- clUnrollJamFactor("unroll-jam-factor", llvm::cl::Hidden,
- llvm::cl::desc("Use this unroll jam factor for all loops"
- " (default 4)"),
- llvm::cl::cat(clOptionsCategory));
-
namespace {
/// Loop unroll jam pass. Currently, this just unroll jams the first
/// outer loop in a Function.
struct LoopUnrollAndJam : public AffineLoopUnrollAndJamBase<LoopUnrollAndJam> {
- Optional<unsigned> unrollJamFactor;
- static const unsigned kDefaultUnrollJamFactor = 4;
-
- explicit LoopUnrollAndJam(Optional<unsigned> unrollJamFactor = None)
- : unrollJamFactor(unrollJamFactor) {}
+ explicit LoopUnrollAndJam(Optional<unsigned> unrollJamFactor = None) {
+ if (unrollJamFactor)
+ this->unrollJamFactor = *unrollJamFactor;
+ }
void runOnFunction() override;
- LogicalResult runOnAffineForOp(AffineForOp forOp);
};
} // end anonymous namespace
@@ -85,19 +74,5 @@ void LoopUnrollAndJam::runOnFunction() {
// any for operation.
auto &entryBlock = getFunction().front();
if (auto forOp = dyn_cast<AffineForOp>(entryBlock.front()))
- runOnAffineForOp(forOp);
-}
-
-/// Unroll and jam a 'affine.for' op. Default unroll jam factor is
-/// kDefaultUnrollJamFactor. Return failure if nothing was done.
-LogicalResult LoopUnrollAndJam::runOnAffineForOp(AffineForOp forOp) {
- // Unroll and jam by the factor that was passed if any.
- if (unrollJamFactor.hasValue())
- return loopUnrollJamByFactor(forOp, unrollJamFactor.getValue());
- // Otherwise, unroll jam by the command-line factor if one was specified.
- if (clUnrollJamFactor.getNumOccurrences() > 0)
- return loopUnrollJamByFactor(forOp, clUnrollJamFactor);
-
- // Unroll and jam by four otherwise.
- return loopUnrollJamByFactor(forOp, kDefaultUnrollJamFactor);
+ loopUnrollJamByFactor(forOp, unrollJamFactor);
}
diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
index d6b2bd11f931..620b200333ee 100644
--- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
@@ -582,7 +582,7 @@ struct Vectorize : public AffineVectorizeBase<Vectorize> {
} // end anonymous namespace
Vectorize::Vectorize(ArrayRef<int64_t> virtualVectorSize) {
- vectorSizes->assign(virtualVectorSize.begin(), virtualVectorSize.end());
+ vectorSizes = virtualVectorSize;
}
/////// TODO(ntv): Hoist to a VectorizationStrategy.cpp when appropriate.
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
index cda2d0860e81..997895b6a869 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -508,9 +508,7 @@ static void tileLinalgOps(FuncOp f, ArrayRef<int64_t> tileSizes) {
namespace {
struct LinalgTilingPass : public LinalgTilingBase<LinalgTilingPass> {
LinalgTilingPass() = default;
- LinalgTilingPass(ArrayRef<int64_t> sizes) {
- tileSizes->assign(sizes.begin(), sizes.end());
- }
+ LinalgTilingPass(ArrayRef<int64_t> sizes) { tileSizes = sizes; }
void runOnFunction() override {
tileLinalgOps<loop::ForOp>(getFunction(), tileSizes);
@@ -521,7 +519,7 @@ struct LinalgTilingToParallelLoopsPass
: public LinalgTilingToParallelLoopsBase<LinalgTilingToParallelLoopsPass> {
LinalgTilingToParallelLoopsPass() = default;
LinalgTilingToParallelLoopsPass(ArrayRef<int64_t> sizes) {
- tileSizes->assign(sizes.begin(), sizes.end());
+ tileSizes = sizes;
}
void runOnFunction() override {
diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp
index 498c23a8a3e5..beaf4c0792fb 100644
--- a/mlir/lib/IR/AsmPrinter.cpp
+++ b/mlir/lib/IR/AsmPrinter.cpp
@@ -146,6 +146,11 @@ bool OpPrintingFlags::shouldElideElementsAttr(ElementsAttr attr) const {
*elementsAttrElementLimit < int64_t(attr.getNumElements());
}
+/// Return the size limit for printing large ElementsAttr.
+Optional<int64_t> OpPrintingFlags::getLargeElementsAttrLimit() const {
+ return elementsAttrElementLimit;
+}
+
/// Return if debug information should be printed.
bool OpPrintingFlags::shouldPrintDebugInfo() const {
return printDebugInfoFlag;
diff --git a/mlir/lib/Transforms/Inliner.cpp b/mlir/lib/Transforms/Inliner.cpp
index 582f720a4841..8ee4996bd03f 100644
--- a/mlir/lib/Transforms/Inliner.cpp
+++ b/mlir/lib/Transforms/Inliner.cpp
@@ -27,16 +27,6 @@
using namespace mlir;
-static llvm::cl::opt<bool> disableCanonicalization(
- "mlir-disable-inline-simplify",
- llvm::cl::desc("Disable running simplifications during inlining"),
- llvm::cl::ReallyHidden, llvm::cl::init(false));
-
-static llvm::cl::opt<unsigned> maxInliningIterations(
- "mlir-max-inline-iterations",
- llvm::cl::desc("Maximum number of iterations when inlining within an SCC"),
- llvm::cl::ReallyHidden, llvm::cl::init(4));
-
//===----------------------------------------------------------------------===//
// Symbol Use Tracking
//===----------------------------------------------------------------------===//
@@ -563,13 +553,55 @@ static void canonicalizeSCC(CallGraph &cg, CGUseList &useList,
useList.recomputeUses(node, cg);
}
-/// Attempt to inline calls within the given scc, and run canonicalizations with
-/// the given patterns, until a fixed point is reached. This allows for the
-/// inlining of newly devirtualized calls.
-static void inlineSCC(Inliner &inliner, CGUseList &useList,
- MutableArrayRef<CallGraphNode *> currentSCC,
- MLIRContext *context,
- const OwningRewritePatternList &canonPatterns) {
+//===----------------------------------------------------------------------===//
+// InlinerPass
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct InlinerPass : public InlinerBase<InlinerPass> {
+ void runOnOperation() override;
+
+ /// Attempt to inline calls within the given scc, and run canonicalizations
+ /// with the given patterns, until a fixed point is reached. This allows for
+ /// the inlining of newly devirtualized calls.
+ void inlineSCC(Inliner &inliner, CGUseList &useList,
+ MutableArrayRef<CallGraphNode *> currentSCC,
+ MLIRContext *context,
+ const OwningRewritePatternList &canonPatterns);
+};
+} // end anonymous namespace
+
+void InlinerPass::runOnOperation() {
+ CallGraph &cg = getAnalysis<CallGraph>();
+ auto *context = &getContext();
+
+ // The inliner should only be run on operations that define a symbol table,
+ // as the callgraph will need to resolve references.
+ Operation *op = getOperation();
+ if (!op->hasTrait<OpTrait::SymbolTable>()) {
+ op->emitOpError() << " was scheduled to run under the inliner, but does "
+ "not define a symbol table";
+ return signalPassFailure();
+ }
+
+ // Collect a set of canonicalization patterns to use when simplifying
+ // callable regions within an SCC.
+ OwningRewritePatternList canonPatterns;
+ for (auto *op : context->getRegisteredOperations())
+ op->getCanonicalizationPatterns(canonPatterns, context);
+
+ // Run the inline transform in post-order over the SCCs in the callgraph.
+ Inliner inliner(context, cg);
+ CGUseList useList(getOperation(), cg);
+ runTransformOnCGSCCs(cg, [&](MutableArrayRef<CallGraphNode *> scc) {
+ inlineSCC(inliner, useList, scc, context, canonPatterns);
+ });
+}
+
+void InlinerPass::inlineSCC(Inliner &inliner, CGUseList &useList,
+ MutableArrayRef<CallGraphNode *> currentSCC,
+ MLIRContext *context,
+ const OwningRewritePatternList &canonPatterns) {
// If we successfully inlined any calls, run some simplifications on the
// nodes of the scc. Continue attempting to inline until we reach a fixed
// point, or a maximum iteration count. We canonicalize here as it may
@@ -584,41 +616,6 @@ static void inlineSCC(Inliner &inliner, CGUseList &useList,
}
}
-//===----------------------------------------------------------------------===//
-// InlinerPass
-//===----------------------------------------------------------------------===//
-
-namespace {
-struct InlinerPass : public InlinerBase<InlinerPass> {
- void runOnOperation() override {
- CallGraph &cg = getAnalysis<CallGraph>();
- auto *context = &getContext();
-
- // The inliner should only be run on operations that define a symbol table,
- // as the callgraph will need to resolve references.
- Operation *op = getOperation();
- if (!op->hasTrait<OpTrait::SymbolTable>()) {
- op->emitOpError() << " was scheduled to run under the inliner, but does "
- "not define a symbol table";
- return signalPassFailure();
- }
-
- // Collect a set of canonicalization patterns to use when simplifying
- // callable regions within an SCC.
- OwningRewritePatternList canonPatterns;
- for (auto *op : context->getRegisteredOperations())
- op->getCanonicalizationPatterns(canonPatterns, context);
-
- // Run the inline transform in post-order over the SCCs in the callgraph.
- Inliner inliner(context, cg);
- CGUseList useList(getOperation(), cg);
- runTransformOnCGSCCs(cg, [&](MutableArrayRef<CallGraphNode *> scc) {
- inlineSCC(inliner, useList, scc, context, canonPatterns);
- });
- }
-};
-} // end anonymous namespace
-
std::unique_ptr<Pass> mlir::createInlinerPass() {
return std::make_unique<InlinerPass>();
}
diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp
index 47ee502b41fb..1486f2c446b2 100644
--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Transforms/LoopFusion.cpp
@@ -37,36 +37,6 @@ using llvm::SetVector;
using namespace mlir;
-static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
-
-/// Disables fusion profitability check and fuses if valid. Ignore any
-/// additional (redundant) computation tolerance threshold
-/// that would have prevented fusion.
-static llvm::cl::opt<bool>
- clMaximalLoopFusion("fusion-maximal",
- llvm::cl::desc("Enables maximal loop fusion"),
- llvm::cl::cat(clOptionsCategory));
-
-/// A threshold in percent of additional computation allowed when fusing.
-static llvm::cl::opt<double> clFusionAddlComputeTolerance(
- "fusion-compute-tolerance",
- llvm::cl::desc("Fractional increase in additional "
- "computation tolerated while fusing"),
- llvm::cl::cat(clOptionsCategory));
-
-static llvm::cl::opt<unsigned> clFusionFastMemorySpace(
- "fusion-fast-mem-space",
- llvm::cl::desc("Faster memory space number to promote fusion buffers to"),
- llvm::cl::cat(clOptionsCategory));
-
-// A local buffer of size less than or equal to this size is automatically
-// promoted to fast memory after producer-consumer fusion.
-static llvm::cl::opt<unsigned long long> clFusionLocalBufThreshold(
- "fusion-local-buf-threshold",
- llvm::cl::desc("Threshold size (KiB) for promoting local buffers to fast "
- "memory space"),
- llvm::cl::cat(clOptionsCategory));
-
namespace {
/// Loop fusion pass. This pass currently supports a greedy fusion policy,
/// which fuses loop nests with single-writer/single-reader memref dependences
@@ -78,24 +48,15 @@ namespace {
// and add support for more general loop fusion algorithms.
struct LoopFusion : public AffineLoopFusionBase<LoopFusion> {
- LoopFusion(unsigned fastMemorySpace = 0, uint64_t localBufSizeThreshold = 0,
- bool maximalFusion = false)
- : localBufSizeThreshold(localBufSizeThreshold),
- fastMemorySpace(fastMemorySpace), maximalFusion(maximalFusion) {}
+ LoopFusion() = default;
+ LoopFusion(unsigned fastMemorySpace, uint64_t localBufSizeThresholdBytes,
+ bool maximalFusion) {
+ this->fastMemorySpace = fastMemorySpace;
+ this->localBufSizeThreshold = localBufSizeThresholdBytes / 1024;
+ this->maximalFusion = maximalFusion;
+ }
void runOnFunction() override;
-
- // Any local buffers smaller than this size (in bytes) will be created in
- // `fastMemorySpace` if provided.
- uint64_t localBufSizeThreshold;
- Optional<unsigned> fastMemorySpace = None;
- // If true, ignore any additional (redundant) computation tolerance threshold
- // that would have prevented fusion.
- bool maximalFusion;
-
- // The amount of additional computation that is tolerated while fusing
- // pair-wise as a fraction of the total computation.
- constexpr static double kComputeToleranceThreshold = 0.30f;
};
} // end anonymous namespace
@@ -1098,7 +1059,8 @@ static bool isFusionProfitable(Operation *srcOpInst, Operation *srcStoreOpInst,
ArrayRef<Operation *> dstLoadOpInsts,
ArrayRef<Operation *> dstStoreOpInsts,
ComputationSliceState *sliceState,
- unsigned *dstLoopDepth, bool maximalFusion) {
+ unsigned *dstLoopDepth, bool maximalFusion,
+ double computeToleranceThreshold) {
LLVM_DEBUG({
llvm::dbgs() << "Checking whether fusion is profitable between:\n";
llvm::dbgs() << " " << *srcOpInst << " and \n";
@@ -1247,11 +1209,6 @@ static bool isFusionProfitable(Operation *srcOpInst, Operation *srcStoreOpInst,
llvm::dbgs() << msg.str();
});
- double computeToleranceThreshold =
- clFusionAddlComputeTolerance.getNumOccurrences() > 0
- ? clFusionAddlComputeTolerance
- : LoopFusion::kComputeToleranceThreshold;
-
// TODO(b/123247369): This is a placeholder cost model.
// Among all choices that add an acceptable amount of redundant computation
// (as per computeToleranceThreshold), we will simply pick the one that
@@ -1426,13 +1383,18 @@ struct GreedyFusion {
// If true, ignore any additional (redundant) computation tolerance threshold
// that would have prevented fusion.
bool maximalFusion;
+ // The amount of additional computation that is tolerated while fusing
+ // pair-wise as a fraction of the total computation.
+ double computeToleranceThreshold;
using Node = MemRefDependenceGraph::Node;
GreedyFusion(MemRefDependenceGraph *mdg, unsigned localBufSizeThreshold,
- Optional<unsigned> fastMemorySpace, bool maximalFusion)
+ Optional<unsigned> fastMemorySpace, bool maximalFusion,
+ double computeToleranceThreshold)
: mdg(mdg), localBufSizeThreshold(localBufSizeThreshold),
- fastMemorySpace(fastMemorySpace), maximalFusion(maximalFusion) {}
+ fastMemorySpace(fastMemorySpace), maximalFusion(maximalFusion),
+ computeToleranceThreshold(computeToleranceThreshold) {}
// Initializes 'worklist' with nodes from 'mdg'
void init() {
@@ -1608,7 +1570,8 @@ struct GreedyFusion {
// Check if fusion would be profitable.
if (!isFusionProfitable(srcStoreOp, srcStoreOp, dstLoadOpInsts,
dstStoreOpInsts, &sliceState,
- &bestDstLoopDepth, maximalFusion))
+ &bestDstLoopDepth, maximalFusion,
+ computeToleranceThreshold))
continue;
// Fuse computation slice of 'srcLoopNest' into 'dstLoopNest'.
@@ -1769,7 +1732,7 @@ struct GreedyFusion {
// Check if fusion would be profitable.
if (!isFusionProfitable(sibLoadOpInst, sibStoreOpInst, dstLoadOpInsts,
dstStoreOpInsts, &sliceState, &bestDstLoopDepth,
- maximalFusion))
+ maximalFusion, computeToleranceThreshold))
continue;
// Fuse computation slice of 'sibLoopNest' into 'dstLoopNest'.
@@ -1954,21 +1917,15 @@ struct GreedyFusion {
} // end anonymous namespace
void LoopFusion::runOnFunction() {
- // Override if a command line argument was provided.
- if (clFusionFastMemorySpace.getNumOccurrences() > 0) {
- fastMemorySpace = clFusionFastMemorySpace.getValue();
- }
-
- // Override if a command line argument was provided.
- if (clFusionLocalBufThreshold.getNumOccurrences() > 0) {
- localBufSizeThreshold = clFusionLocalBufThreshold * 1024;
- }
-
- if (clMaximalLoopFusion.getNumOccurrences() > 0)
- maximalFusion = clMaximalLoopFusion;
-
MemRefDependenceGraph g;
- if (g.init(getFunction()))
- GreedyFusion(&g, localBufSizeThreshold, fastMemorySpace, maximalFusion)
- .run();
+ if (!g.init(getFunction()))
+ return;
+
+ Optional<unsigned> fastMemorySpaceOpt;
+ if (fastMemorySpace.hasValue())
+ fastMemorySpaceOpt = fastMemorySpace;
+ unsigned localBufSizeThresholdBytes = localBufSizeThreshold * 1024;
+ GreedyFusion fusion(&g, localBufSizeThresholdBytes, fastMemorySpaceOpt,
+ maximalFusion, computeToleranceThreshold);
+ fusion.run();
}
diff --git a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
index 79b3db6433d0..258df35d9b4d 100644
--- a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
+++ b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
@@ -23,13 +23,10 @@ using namespace mlir;
#define DEBUG_TYPE "pattern-matcher"
-static llvm::cl::opt<unsigned> maxPatternMatchIterations(
- "mlir-max-pattern-match-iterations",
- llvm::cl::desc("Max number of iterations scanning for pattern match"),
- llvm::cl::init(10));
+/// The max number of iterations scanning for pattern match.
+static unsigned maxPatternMatchIterations = 10;
namespace {
-
/// This is a worklist-driven driver for the PatternMatcher, which repeatedly
/// applies the locally optimal patterns in a roughly "bottom up" way.
class GreedyPatternRewriteDriver : public PatternRewriter {
diff --git a/mlir/lib/Transforms/ViewOpGraph.cpp b/mlir/lib/Transforms/ViewOpGraph.cpp
index 41e33e8a178c..6ead3634c5be 100644
--- a/mlir/lib/Transforms/ViewOpGraph.cpp
+++ b/mlir/lib/Transforms/ViewOpGraph.cpp
@@ -14,13 +14,16 @@
#include "mlir/Support/STLExtras.h"
#include "llvm/Support/CommandLine.h"
-static llvm::cl::opt<int> elideIfLarger(
- "print-op-graph-elide-if-larger",
- llvm::cl::desc("Upper limit to emit elements attribute rather than elide"),
- llvm::cl::init(16));
-
using namespace mlir;
+/// Return the size limits for eliding large attributes.
+static int64_t getLargeAttributeSizeLimit() {
+ // Use the default from the printer flags if possible.
+ if (Optional<int64_t> limit = OpPrintingFlags().getLargeElementsAttrLimit())
+ return *limit;
+ return 16;
+}
+
namespace llvm {
// Specialize GraphTraits to treat Block as a graph of Operations as nodes and
@@ -65,6 +68,8 @@ std::string DOTGraphTraits<Block *>::getNodeLabel(Operation *op, Block *b) {
interleaveComma(op->getResultTypes(), os);
os << "\n";
+ // A value used to elide large container attribute.
+ int64_t largeAttrLimit = getLargeAttributeSizeLimit();
for (auto attr : op->getAttrs()) {
os << '\n' << attr.first << ": ";
// Always emit splat attributes.
@@ -75,7 +80,7 @@ std::string DOTGraphTraits<Block *>::getNodeLabel(Operation *op, Block *b) {
// Elide "big" elements attributes.
auto elements = attr.second.dyn_cast<ElementsAttr>();
- if (elements && elements.getNumElements() > elideIfLarger) {
+ if (elements && elements.getNumElements() > largeAttrLimit) {
os << std::string(elements.getType().getRank(), '[') << "..."
<< std::string(elements.getType().getRank(), ']') << " : "
<< elements.getType();
@@ -83,7 +88,7 @@ std::string DOTGraphTraits<Block *>::getNodeLabel(Operation *op, Block *b) {
}
auto array = attr.second.dyn_cast<ArrayAttr>();
- if (array && static_cast<int64_t>(array.size()) > elideIfLarger) {
+ if (array && static_cast<int64_t>(array.size()) > largeAttrLimit) {
os << "[...]";
continue;
}
diff --git a/mlir/test/Dialect/Affine/affine-data-copy.mlir b/mlir/test/Dialect/Affine/affine-data-copy.mlir
index de8889e4510f..52c60d7177f8 100644
--- a/mlir/test/Dialect/Affine/affine-data-copy.mlir
+++ b/mlir/test/Dialect/Affine/affine-data-copy.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt %s -split-input-file -affine-data-copy-generate -affine-data-copy-generate-dma=false -affine-data-copy-generate-fast-mem-space=0 -affine-data-copy-generate-skip-non-unit-stride-loops | FileCheck %s
+// RUN: mlir-opt %s -split-input-file -affine-data-copy-generate="generate-dma=false fast-mem-space=0 skip-non-unit-stride-loops" | FileCheck %s
// Small buffer size to trigger fine copies.
-// RUN: mlir-opt %s -split-input-file -affine-data-copy-generate -affine-data-copy-generate-dma=false -affine-data-copy-generate-fast-mem-space=0 -affine-data-copy-generate-fast-mem-capacity=1 | FileCheck --check-prefix=CHECK-SMALL %s
+// RUN: mlir-opt %s -split-input-file -affine-data-copy-generate="generate-dma=false fast-mem-space=0 fast-mem-capacity=1" | FileCheck --check-prefix=CHECK-SMALL %s
// Test affine data copy with a memref filter. We use a test pass that invokes
// affine data copy utility on the input loop nest.
diff --git a/mlir/test/Dialect/Affine/dma-generate.mlir b/mlir/test/Dialect/Affine/dma-generate.mlir
index 6afbb163aed4..3572b0a12e20 100644
--- a/mlir/test/Dialect/Affine/dma-generate.mlir
+++ b/mlir/test/Dialect/Affine/dma-generate.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate -affine-data-copy-generate-dma -affine-data-copy-generate-fast-mem-space=2 -affine-data-copy-generate-skip-non-unit-stride-loops -verify-diagnostics | FileCheck %s
-// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate -affine-data-copy-generate-dma -affine-data-copy-generate-fast-mem-capacity=16 -affine-data-copy-generate-fast-mem-space=2 | FileCheck %s --check-prefix FAST-MEM-16KB
+// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate="generate-dma fast-mem-space=2 skip-non-unit-stride-loops" -verify-diagnostics | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-data-copy-generate="generate-dma fast-mem-capacity=16 fast-mem-space=2" | FileCheck %s --check-prefix FAST-MEM-16KB
// We run most test cases with -copy-skip-non-unit-stride-loops to allow testing
// DMA generation at inner levels easily - since the DMA generation would
diff --git a/mlir/test/Dialect/Affine/inlining.mlir b/mlir/test/Dialect/Affine/inlining.mlir
index a83ef37a1d55..e65ae5d0b73a 100644
--- a/mlir/test/Dialect/Affine/inlining.mlir
+++ b/mlir/test/Dialect/Affine/inlining.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -inline -mlir-disable-inline-simplify | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -inline="disable-simplify" | FileCheck %s
// Basic test that functions within affine operations are inlined.
func @func_with_affine_ops(%N: index) {
diff --git a/mlir/test/Dialect/Affine/loop-tiling.mlir b/mlir/test/Dialect/Affine/loop-tiling.mlir
index 110233aa23a2..075ab0c230ac 100644
--- a/mlir/test/Dialect/Affine/loop-tiling.mlir
+++ b/mlir/test/Dialect/Affine/loop-tiling.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile -affine-tile-size=32 | FileCheck %s
-// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile -affine-tile-cache-size=512 | FileCheck %s --check-prefix=MODEL
-// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile -affine-tile-size=32 -affine-tile-separate | FileCheck %s --check-prefix=SEPARATE
+// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile="tile-size=32" | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile="cache-size=512" | FileCheck %s --check-prefix=MODEL
+// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-loop-tile="tile-size=32 separate" | FileCheck %s --check-prefix=SEPARATE
// -----
diff --git a/mlir/test/Dialect/Affine/unroll-jam.mlir b/mlir/test/Dialect/Affine/unroll-jam.mlir
index 9d23f7a6aa67..b1513be7b937 100644
--- a/mlir/test/Dialect/Affine/unroll-jam.mlir
+++ b/mlir/test/Dialect/Affine/unroll-jam.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam -unroll-jam-factor=2 | FileCheck %s
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam -unroll-jam-factor=4 | FileCheck --check-prefix=UJAM-FOUR %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam="unroll-jam-factor=2" | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam="unroll-jam-factor=4" | FileCheck --check-prefix=UJAM-FOUR %s
// CHECK-DAG: [[MAP_PLUS_1:#map[0-9]+]] = affine_map<(d0) -> (d0 + 1)>
// CHECK-DAG: [[MAP_DIV_OFFSET:#map[0-9]+]] = affine_map<()[s0] -> (((s0 - 1) floordiv 2) * 2 + 1)>
diff --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir
index 2681fdc2ce1d..cd9fb63f1585 100644
--- a/mlir/test/Dialect/Affine/unroll.mlir
+++ b/mlir/test/Dialect/Affine/unroll.mlir
@@ -1,7 +1,7 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll -unroll-full | FileCheck %s --check-prefix UNROLL-FULL
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll -unroll-full -unroll-full-threshold=2 | FileCheck %s --check-prefix SHORT
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll -unroll-factor=4 | FileCheck %s --check-prefix UNROLL-BY-4
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll -unroll-factor=1 | FileCheck %s --check-prefix UNROLL-BY-1
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-full" | FileCheck %s --check-prefix UNROLL-FULL
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-full unroll-full-threshold=2" | FileCheck %s --check-prefix SHORT
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=4" | FileCheck %s --check-prefix UNROLL-BY-4
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=1" | FileCheck %s --check-prefix UNROLL-BY-1
// UNROLL-FULL-DAG: [[MAP0:#map[0-9]+]] = affine_map<(d0) -> (d0 + 1)>
// UNROLL-FULL-DAG: [[MAP1:#map[0-9]+]] = affine_map<(d0) -> (d0 + 2)>
diff --git a/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir b/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir
index fc188c3938a8..24b22c5dbf6f 100644
--- a/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir
+++ b/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -split-input-file -pass-pipeline='spv.module(inline)' -mlir-disable-inline-simplify | FileCheck %s
+// RUN: mlir-opt %s -split-input-file -pass-pipeline='spv.module(inline{disable-simplify})' | FileCheck %s
spv.module Logical GLSL450 {
spv.func @callee() "None" {
diff --git a/mlir/test/Transforms/inlining.mlir b/mlir/test/Transforms/inlining.mlir
index 980b3c380ae3..d62400fcb2de 100644
--- a/mlir/test/Transforms/inlining.mlir
+++ b/mlir/test/Transforms/inlining.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt %s -inline -mlir-disable-inline-simplify | FileCheck %s
-// RUN: mlir-opt %s -inline -mlir-disable-inline-simplify -mlir-print-debuginfo | FileCheck %s --check-prefix INLINE-LOC
-// RUN: mlir-opt %s -inline -mlir-disable-inline-simplify=false | FileCheck %s --check-prefix INLINE_SIMPLIFY
+// RUN: mlir-opt %s -inline="disable-simplify" | FileCheck %s
+// RUN: mlir-opt %s -inline="disable-simplify" -mlir-print-debuginfo | FileCheck %s --check-prefix INLINE-LOC
+// RUN: mlir-opt %s -inline | FileCheck %s --check-prefix INLINE_SIMPLIFY
// Inline a function that takes an argument.
func @func_with_arg(%c : i32) -> i32 {
diff --git a/mlir/test/Transforms/loop-fusion.mlir b/mlir/test/Transforms/loop-fusion.mlir
index d1e1587ea2fe..d19aa5e5558b 100644
--- a/mlir/test/Transforms/loop-fusion.mlir
+++ b/mlir/test/Transforms/loop-fusion.mlir
@@ -1,5 +1,5 @@
// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-fusion -split-input-file | FileCheck %s
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-fusion -fusion-maximal -split-input-file | FileCheck %s --check-prefix=MAXIMAL
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-fusion="fusion-maximal" -split-input-file | FileCheck %s --check-prefix=MAXIMAL
// TODO(andydavis) Add more tests:
// *) Add nested fusion test cases when non-constant loop bound support is
diff --git a/mlir/test/lib/Pass/TestPassManager.cpp b/mlir/test/lib/Pass/TestPassManager.cpp
index ffac1b18be46..f6ac0dee4373 100644
--- a/mlir/test/lib/Pass/TestPassManager.cpp
+++ b/mlir/test/lib/Pass/TestPassManager.cpp
@@ -35,10 +35,9 @@ class TestOptionsPass : public PassWrapper<TestOptionsPass, FunctionPass> {
TestOptionsPass() = default;
TestOptionsPass(const TestOptionsPass &) {}
TestOptionsPass(const Options &options) {
- listOption->assign(options.listOption.begin(), options.listOption.end());
- stringOption.setValue(options.stringOption);
- stringListOption->assign(options.stringListOption.begin(),
- options.stringListOption.end());
+ listOption = options.listOption;
+ stringOption = options.stringOption;
+ stringListOption = options.stringListOption;
}
void runOnFunction() final {}
More information about the llvm-commits
mailing list