[Mlir-commits] [mlir] [mlir][vector] Add mask elimination transform (PR #99314)
Mehdi Amini
llvmlistbot at llvm.org
Thu Jul 18 02:16:53 PDT 2024
================
@@ -0,0 +1,117 @@
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Utils/StaticValueUtils.h"
+#include "mlir/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.h"
+#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h"
+#include "mlir/Dialect/Vector/Transforms/VectorTransforms.h"
+#include "mlir/Interfaces/FunctionInterfaces.h"
+
+using namespace mlir;
+using namespace mlir::vector;
+namespace {
+
+/// If `value` is a constant multiple of `vector.vscale` return the multiplier.
+std::optional<int64_t> getConstantVscaleMultiplier(Value value) {
+ if (value.getDefiningOp<vector::VectorScaleOp>())
+ return 1;
+ auto mul = value.getDefiningOp<arith::MulIOp>();
+ if (!mul)
+ return {};
+ auto lhs = mul.getLhs();
+ auto rhs = mul.getRhs();
+ if (lhs.getDefiningOp<vector::VectorScaleOp>())
+ return getConstantIntValue(rhs);
+ if (rhs.getDefiningOp<vector::VectorScaleOp>())
+ return getConstantIntValue(lhs);
+ return {};
+}
+
+/// Attempts to resolve a (scalable) CreateMaskOp to an all-true constant mask.
+/// All-true masks can then be eliminated by simple folds.
+LogicalResult resolveAllTrueCreateMaskOp(IRRewriter &rewriter,
+ vector::CreateMaskOp createMaskOp,
+ VscaleRange vscaleRange) {
+ auto maskType = createMaskOp.getVectorType();
+ auto maskTypeDimScalableFlags = maskType.getScalableDims();
+ auto maskTypeDimSizes = maskType.getShape();
+
+ struct UnknownMaskDim {
+ size_t position;
+ Value dimSize;
+ };
+
+ // Check for any dims that could be (partially) false before doing the more
+ // expensive value bounds computations.
+ SmallVector<UnknownMaskDim> unknownDims;
+ for (auto [i, dimSize] : llvm::enumerate(createMaskOp.getOperands())) {
+ if (auto intSize = getConstantIntValue(dimSize)) {
+ // Mask not all-true for this dim.
+ if (maskTypeDimScalableFlags[i] || intSize < maskTypeDimSizes[i])
+ return failure();
+ } else if (auto vscaleMultiplier = getConstantVscaleMultiplier(dimSize)) {
+ // Mask not all-true for this dim.
+ if (vscaleMultiplier < maskTypeDimSizes[i])
+ return failure();
+ } else {
+ // Unknown (without further analysis).
+ unknownDims.push_back(UnknownMaskDim{i, dimSize});
+ }
+ }
+
+ for (auto [i, dimSize] : unknownDims) {
+ // Compute the lower bound for the unknown dimension (i.e. the smallest
+ // value it could be).
+ auto lowerBound =
+ vector::ScalableValueBoundsConstraintSet::computeScalableBound(
+ dimSize, {}, vscaleRange.vscaleMin, vscaleRange.vscaleMax,
+ presburger::BoundType::LB);
+ if (failed(lowerBound))
+ return failure();
+ auto boundSize = lowerBound->getSize();
+ if (failed(boundSize))
+ return failure();
+ if (boundSize->scalable) {
+ // If the lower bound is scalable and >= to the mask dim size then this
+ // dim is all-true.
+ if (boundSize->baseSize < maskTypeDimSizes[i])
+ return failure();
+ } else {
+ // If the lower bound is a constant and >= to the _fixed-size_ mask dim
+ // size then this dim is all-true.
+ if (maskTypeDimScalableFlags[i])
+ return failure();
+ if (boundSize->baseSize < maskTypeDimSizes[i])
+ return failure();
+ }
+ }
+
+ // Replace createMaskOp with an all-true constant. This should result in the
+ // mask being removed in most cases (as xfer ops + vector.mask have folds to
+ // remove all-true masks).
+ auto allTrue = rewriter.create<arith::ConstantOp>(
+ createMaskOp.getLoc(), maskType, DenseElementsAttr::get(maskType, true));
+ rewriter.replaceAllUsesWith(createMaskOp, allTrue);
+ return success();
+}
+
+} // namespace
+
+namespace mlir::vector {
+
+void eliminateVectorMasks(IRRewriter &rewriter, FunctionOpInterface function,
+ std::optional<VscaleRange> vscaleRange) {
+ // TODO: Support fixed-size case. This is less likely to be useful as for
+ // fixed-size code dimensions are all static so masks tend to fold away.
+ if (!vscaleRange)
+ return;
+
+ OpBuilder::InsertionGuard g(rewriter);
+ SmallVector<vector::CreateMaskOp> worklist;
+ function.walk([&](vector::CreateMaskOp createMaskOp) {
+ worklist.push_back(createMaskOp);
+ });
+ rewriter.setInsertionPointToStart(&function.front());
+ for (auto mask : worklist)
+ (void)resolveAllTrueCreateMaskOp(rewriter, mask, *vscaleRange);
----------------
joker-eph wrote:
This statement as a generality does not seem correct: a lot of mutation are perfectly fine to do while walking the IR, it all depends what you do.
I would say that if you need a worklist, you should document why the delayed modification is needed.
https://github.com/llvm/llvm-project/pull/99314
More information about the Mlir-commits
mailing list