[Mlir-commits] [mlir] [mlir] Bump SmallVector sizes along hot paths (PR #188827)
Jakub Kuderski
llvmlistbot at llvm.org
Thu Mar 26 12:53:49 PDT 2026
https://github.com/kuhar updated https://github.com/llvm/llvm-project/pull/188827
>From d8c2bd7f26d9cdfe2831709900b0325f23dcabe6 Mon Sep 17 00:00:00 2001
From: Jakub Kuderski <jakub at nod-labs.com>
Date: Thu, 26 Mar 2026 15:07:28 -0400
Subject: [PATCH 1/2] [mlir] Bump SmallVector sizes along hot paths
This is based on empirical data from compiling 9 medium to large
language and diffusion models with IREE. e2e, this improves compilation
times by 0.33% in terms of `instructions:u` (same metric is used by the
[CTMark for Clang](https://www.npopov.com/2024/01/01/This-year-in-LLVM-2023.html#compile-time-improvements)).
I explored using other constants and these are the ones that performed
best while keeping the sizes relatively small.
---
mlir/include/mlir/IR/AffineMap.h | 2 +-
.../include/mlir/Interfaces/ValueBoundsOpInterface.h | 8 ++++++--
mlir/lib/Analysis/FlatLinearValueConstraints.cpp | 4 +++-
.../Arith/Transforms/IntRangeOptimizations.cpp | 4 +++-
mlir/lib/IR/AffineMap.cpp | 12 ++++++++----
mlir/lib/Interfaces/IndexingMapOpInterface.cpp | 4 +++-
mlir/lib/Interfaces/InferIntRangeInterface.cpp | 4 +++-
mlir/lib/Interfaces/ValueBoundsOpInterface.cpp | 8 ++++++--
mlir/lib/Transforms/Utils/DialectConversion.cpp | 7 ++++---
9 files changed, 37 insertions(+), 16 deletions(-)
diff --git a/mlir/include/mlir/IR/AffineMap.h b/mlir/include/mlir/IR/AffineMap.h
index 4bc40a7d4091a..de10b99771da1 100644
--- a/mlir/include/mlir/IR/AffineMap.h
+++ b/mlir/include/mlir/IR/AffineMap.h
@@ -352,7 +352,7 @@ class AffineMap {
/// Applies composition by the dims of `this` to the integer `values` and
/// returns the resulting values. `this` must be symbol-less.
- SmallVector<int64_t, 4> compose(ArrayRef<int64_t> values) const;
+ SmallVector<int64_t, 8> compose(ArrayRef<int64_t> values) const;
/// Returns the number of "zero" results (constant values == 0) in this map.
///
diff --git a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h
index 58852239444b9..0590cadca8f9e 100644
--- a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h
+++ b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h
@@ -48,7 +48,9 @@ class HyperrectangularSlice {
SmallVector<OpFoldResult> mixedStrides;
};
-using ValueDimList = SmallVector<std::pair<Value, std::optional<int64_t>>>;
+// Inline size chosen empirically based on compilation profiling.
+// Profiled: 488K calls, avg=1.5+-0.5. N=2 covers >90% of cases inline.
+using ValueDimList = SmallVector<std::pair<Value, std::optional<int64_t>>, 2>;
/// A helper class to be used with `ValueBoundsOpInterface`. This class stores a
/// constraint system and mapping of constrained variables to index-typed
@@ -415,7 +417,9 @@ class ValueBoundsConstraintSet
void projectOutAnonymous(std::optional<int64_t> except = std::nullopt);
/// Mapping of columns to values/shape dimensions.
- SmallVector<std::optional<ValueDim>> positionToValueDim;
+ // Inline size chosen empirically based on compilation profiling.
+ // Profiled: 2.1M calls, avg=3.0+-1.9. N=4 covers ~70% of cases inline.
+ SmallVector<std::optional<ValueDim>, 4> positionToValueDim;
/// Reverse mapping of values/shape dimensions to columns.
DenseMap<ValueDim, int64_t> valueDimToPosition;
diff --git a/mlir/lib/Analysis/FlatLinearValueConstraints.cpp b/mlir/lib/Analysis/FlatLinearValueConstraints.cpp
index 0e0c5f2159382..8e8fcc347b720 100644
--- a/mlir/lib/Analysis/FlatLinearValueConstraints.cpp
+++ b/mlir/lib/Analysis/FlatLinearValueConstraints.cpp
@@ -835,7 +835,9 @@ LogicalResult FlatLinearConstraints::addBound(
// Add one (in)equality for each result.
for (const auto &flatExpr : flatExprs) {
- SmallVector<int64_t> ineq(getNumCols(), 0);
+ // Inline size chosen empirically based on compilation profiling.
+ // Profiled: 7.1M calls, avg=5.3+-3.0. N=8 covers 82% of cases inline.
+ SmallVector<int64_t, 8> ineq(getNumCols(), 0);
// Dims and symbols.
for (unsigned j = 0, e = boundMap.getNumInputs(); j < e; j++) {
ineq[j] = lower ? -flatExpr[j] : flatExpr[j];
diff --git a/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp b/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp
index 9f849ac4980d6..5813f3c3ea746 100644
--- a/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp
+++ b/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp
@@ -331,7 +331,9 @@ struct NarrowElementwise final : OpTraitRewritePattern<OpTrait::Elementwise> {
if (op->getNumResults() == 0)
return rewriter.notifyMatchFailure(op, "can't narrow resultless op");
- SmallVector<ConstantIntRanges> ranges;
+ // Inline size chosen empirically based on compilation profiling.
+ // Profiled: 2.6M calls, avg=1.7+-1.3. N=4 covers >95% of cases inline.
+ SmallVector<ConstantIntRanges, 4> ranges;
if (failed(collectRanges(solver, op->getOperands(), ranges)))
return rewriter.notifyMatchFailure(op, "input without specified range");
if (failed(collectRanges(solver, op->getResults(), ranges)))
diff --git a/mlir/lib/IR/AffineMap.cpp b/mlir/lib/IR/AffineMap.cpp
index 7235b442aa567..446bc9a6f6f87 100644
--- a/mlir/lib/IR/AffineMap.cpp
+++ b/mlir/lib/IR/AffineMap.cpp
@@ -276,7 +276,9 @@ AffineMap AffineMap::getPermutationMap(ArrayRef<int64_t> permutation,
AffineMap AffineMap::getMultiDimMapWithTargets(unsigned numDims,
ArrayRef<unsigned> targets,
MLIRContext *context) {
- SmallVector<AffineExpr, 4> affExprs;
+ // Inline size chosen empirically based on compilation profiling.
+ // Profiled: 3.1M calls, avg=4.1+-3.7. N=8 covers ~86% of cases inline.
+ SmallVector<AffineExpr, 8> affExprs;
for (unsigned t : targets)
affExprs.push_back(getAffineDimExpr(t, context));
AffineMap result = AffineMap::get(/*dimCount=*/numDims, /*symbolCount=*/0,
@@ -573,13 +575,15 @@ AffineMap AffineMap::compose(AffineMap map) const {
return AffineMap::get(numDims, numSymbols, exprs, map.getContext());
}
-SmallVector<int64_t, 4> AffineMap::compose(ArrayRef<int64_t> values) const {
+// Inline size chosen empirically based on compilation profiling.
+// Profiled: 43.5M calls, avg=3.1+-2.3. N=8 covers ~98% of cases inline.
+SmallVector<int64_t, 8> AffineMap::compose(ArrayRef<int64_t> values) const {
assert(getNumSymbols() == 0 && "Expected symbol-less map");
- SmallVector<AffineExpr, 4> exprs;
+ SmallVector<AffineExpr, 8> exprs;
MLIRContext *ctx = getContext();
for (int64_t value : values)
exprs.push_back(getAffineConstantExpr(value, ctx));
- SmallVector<int64_t, 4> res;
+ SmallVector<int64_t, 8> res;
res.reserve(getNumResults());
for (auto e : getResults())
res.push_back(cast<AffineConstantExpr>(e.replaceDims(exprs)).getValue());
diff --git a/mlir/lib/Interfaces/IndexingMapOpInterface.cpp b/mlir/lib/Interfaces/IndexingMapOpInterface.cpp
index 665a164cb5ff1..41e9b438fa98f 100644
--- a/mlir/lib/Interfaces/IndexingMapOpInterface.cpp
+++ b/mlir/lib/Interfaces/IndexingMapOpInterface.cpp
@@ -56,7 +56,9 @@ LogicalResult mlir::IndexingMapOpInterface::verifyImpl() {
<< ") to be equal to the number of input/output operands ("
<< getOperation()->getNumOperands() << ")";
- SmallVector<int64_t> allShapesSizes;
+ // Inline size chosen empirically based on compilation profiling.
+ // Profiled: 7.5M calls, avg=5.9+-3.1. N=8 covers 67% of cases inline.
+ SmallVector<int64_t, 8> allShapesSizes;
for (OpOperand &opOperand : getOperation()->getOpOperands()) {
Type ty = opOperand.get().getType();
diff --git a/mlir/lib/Interfaces/InferIntRangeInterface.cpp b/mlir/lib/Interfaces/InferIntRangeInterface.cpp
index 84fc9b8b61a11..9d8e5f50a725b 100644
--- a/mlir/lib/Interfaces/InferIntRangeInterface.cpp
+++ b/mlir/lib/Interfaces/InferIntRangeInterface.cpp
@@ -168,7 +168,9 @@ mlir::getIntValueRanges(ArrayRef<OpFoldResult> values,
void mlir::intrange::detail::defaultInferResultRanges(
InferIntRangeInterface interface, ArrayRef<IntegerValueRange> argRanges,
SetIntLatticeFn setResultRanges) {
- llvm::SmallVector<ConstantIntRanges> unpacked;
+ // Inline size chosen empirically based on compilation profiling.
+ // Profiled: 1.6M calls, avg=1.2+-0.8. N=2 covers ~84% of cases inline.
+ llvm::SmallVector<ConstantIntRanges, 2> unpacked;
unpacked.reserve(argRanges.size());
for (const IntegerValueRange &range : argRanges) {
diff --git a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp
index 35e3e92b91176..2cca60870eca0 100644
--- a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp
+++ b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp
@@ -114,7 +114,9 @@ ValueBoundsConstraintSet::Variable::Variable(AffineMap map,
// Turn all dims into symbols.
Builder b(map.getContext());
- SmallVector<AffineExpr> dimReplacements, symReplacements;
+ // Inline size chosen empirically based on compilation profiling.
+ // Profiled: 490K calls, avg=1.5+-0.6. N=8 covers >99% of cases inline.
+ SmallVector<AffineExpr, 8> dimReplacements, symReplacements;
for (int64_t i = 0, e = map.getNumDims(); i < e; ++i)
dimReplacements.push_back(b.getAffineSymbolExpr(i));
for (int64_t i = 0, e = map.getNumSymbols(); i < e; ++i)
@@ -718,7 +720,9 @@ bool ValueBoundsConstraintSet::comparePos(int64_t lhsPos,
comparePos(lhsPos, ComparisonOperator::GE, rhsPos);
// Construct inequality.
- SmallVector<int64_t> eq(cstr.getNumCols(), 0);
+ // Inline size chosen empirically based on compilation profiling.
+ // Profiled: 3.2M calls, avg=4.0+-2.3. N=8 covers ~95% of cases inline.
+ SmallVector<int64_t, 8> eq(cstr.getNumCols(), 0);
if (cmp == LT || cmp == LE) {
++eq[lhsPos];
--eq[rhsPos];
diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp
index cf59234ded233..71cb0438e3955 100644
--- a/mlir/lib/Transforms/Utils/DialectConversion.cpp
+++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp
@@ -114,9 +114,10 @@ enum OpConversionMode {
// ConversionValueMapping
//===----------------------------------------------------------------------===//
-/// A vector of SSA values, optimized for the most common case of a single
-/// value.
-using ValueVector = SmallVector<Value, 1>;
+/// A vector of SSA values, optimized for the most common case of one or two
+/// values. Inline size chosen empirically based on compilation profiling.
+/// Profiled: 2.3M calls, avg=2.0+-0.3. N=2 covers 98% of cases inline.
+using ValueVector = SmallVector<Value, 2>;
namespace {
>From 3982f56e99f62dc16755b52417320fcdf3afa0da Mon Sep 17 00:00:00 2001
From: Jakub Kuderski <jakub at nod-labs.com>
Date: Thu, 26 Mar 2026 15:53:34 -0400
Subject: [PATCH 2/2] Fix one small size
---
mlir/lib/Interfaces/ValueBoundsOpInterface.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp
index 2cca60870eca0..c40a0e0114752 100644
--- a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp
+++ b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp
@@ -115,8 +115,8 @@ ValueBoundsConstraintSet::Variable::Variable(AffineMap map,
// Turn all dims into symbols.
Builder b(map.getContext());
// Inline size chosen empirically based on compilation profiling.
- // Profiled: 490K calls, avg=1.5+-0.6. N=8 covers >99% of cases inline.
- SmallVector<AffineExpr, 8> dimReplacements, symReplacements;
+ // Profiled: 490K calls, avg=1.5+-0.6. N=4 covers >99% of cases inline.
+ SmallVector<AffineExpr, 4> dimReplacements, symReplacements;
for (int64_t i = 0, e = map.getNumDims(); i < e; ++i)
dimReplacements.push_back(b.getAffineSymbolExpr(i));
for (int64_t i = 0, e = map.getNumSymbols(); i < e; ++i)
More information about the Mlir-commits
mailing list