[Mlir-commits] [mlir] 7625742 - [mlir][Linalg] Add support for tileAndDistribute on tensors.
Nicolas Vasilache
llvmlistbot at llvm.org
Mon Nov 16 03:16:08 PST 2020
Author: Nicolas Vasilache
Date: 2020-11-16T11:12:50Z
New Revision: 76257422378e54dc2b59ff034e2955e9518e6c99
URL: https://github.com/llvm/llvm-project/commit/76257422378e54dc2b59ff034e2955e9518e6c99
DIFF: https://github.com/llvm/llvm-project/commit/76257422378e54dc2b59ff034e2955e9518e6c99.diff
LOG: [mlir][Linalg] Add support for tileAndDistribute on tensors.
scf.parallel is currently not a good fit for tiling on tensors.
Instead provide a path to parallelism directly through scf.for.
For now, this transformation ignores the distribution scheme and always does a block-cyclic mapping (where block is the tile size).
Differential revision: https://reviews.llvm.org/D90475
Added:
Modified:
mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
mlir/include/mlir/Dialect/SCF/SCF.h
mlir/lib/Dialect/Linalg/Utils/Utils.cpp
mlir/lib/Dialect/SCF/EDSC/Builders.cpp
mlir/lib/Dialect/SCF/SCF.cpp
mlir/test/Dialect/Linalg/tile-and-distribute.mlir
mlir/test/EDSC/builder-api-test.cpp
mlir/test/lib/Transforms/TestLinalgTransforms.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h b/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
index fe8df4c2d0e4..8622d8c98315 100644
--- a/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
+++ b/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
@@ -24,15 +24,15 @@ namespace edsc {
/// Adapters for building loop nests using the builder and the location stored
/// in ScopedContext. Actual builders are in scf::buildLoopNest.
-scf::ValueVector loopNestBuilder(ValueRange lbs, ValueRange ubs,
+scf::LoopNest loopNestBuilder(ValueRange lbs, ValueRange ubs,
ValueRange steps,
function_ref<void(ValueRange)> fun = nullptr);
-scf::ValueVector loopNestBuilder(Value lb, Value ub, Value step,
+scf::LoopNest loopNestBuilder(Value lb, Value ub, Value step,
function_ref<void(Value)> fun = nullptr);
-scf::ValueVector loopNestBuilder(
+scf::LoopNest loopNestBuilder(
Value lb, Value ub, Value step, ValueRange iterArgInitValues,
function_ref<scf::ValueVector(Value, ValueRange)> fun = nullptr);
-scf::ValueVector loopNestBuilder(
+scf::LoopNest loopNestBuilder(
ValueRange lbs, ValueRange ubs, ValueRange steps,
ValueRange iterArgInitValues,
function_ref<scf::ValueVector(ValueRange, ValueRange)> fun = nullptr);
diff --git a/mlir/include/mlir/Dialect/SCF/SCF.h b/mlir/include/mlir/Dialect/SCF/SCF.h
index 55c8cbf5fa74..619ebd2639e7 100644
--- a/mlir/include/mlir/Dialect/SCF/SCF.h
+++ b/mlir/include/mlir/Dialect/SCF/SCF.h
@@ -51,6 +51,11 @@ ParallelOp getParallelForInductionVarOwner(Value val);
/// An owning vector of values, handy to return from functions.
using ValueVector = std::vector<Value>;
+using LoopVector = std::vector<scf::ForOp>;
+struct LoopNest {
+ ResultRange getResults() { return loops.front().getResults(); }
+ LoopVector loops;
+};
/// Creates a perfect nest of "for" loops, i.e. all loops but the innermost
/// contain only another loop and a terminator. The lower, upper bounds and
@@ -65,11 +70,12 @@ using ValueVector = std::vector<Value>;
/// yielded from the loop body and forwarded back through the loop nest. If the
/// function is not provided, the loop nest is not expected to have iteration
/// arguments, the body of the innermost loop will be left empty, containing
-/// only the zero-operand terminator. Returns the values yielded by the
-/// outermost loop. If bound arrays are empty, the body builder will be called
+/// only the zero-operand terminator. Returns the LoopNest containing the list
+/// of perfectly nest scf::ForOp build during the call.
+/// If bound arrays are empty, the body builder will be called
/// once to construct the IR outside of the loop with an empty list of induction
/// variables.
-ValueVector buildLoopNest(
+LoopNest buildLoopNest(
OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs,
ValueRange steps, ValueRange iterArgs,
function_ref<ValueVector(OpBuilder &, Location, ValueRange, ValueRange)>
@@ -78,7 +84,8 @@ ValueVector buildLoopNest(
/// A convenience version for building loop nests without iteration arguments
/// (like for reductions). Does not take the initial value of reductions or
/// expect the body building functions to return their current value.
-ValueVector buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs,
+/// The built nested scf::For are captured in `capturedLoops` when non-null.
+LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs,
ValueRange ubs, ValueRange steps,
function_ref<void(OpBuilder &, Location, ValueRange)>
bodyBuilder = nullptr);
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index 210d17516718..e5f0ba013e01 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -24,6 +24,7 @@
#include "mlir/IR/Matchers.h"
#include "mlir/IR/OpImplementation.h"
#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/LoopUtils.h"
using namespace mlir;
using namespace mlir::linalg;
@@ -171,10 +172,27 @@ void GenerateLoopNest<scf::ForOp>::doit(
ArrayRef<Range> loopRanges, ValueRange iterArgInitValues,
ArrayRef<Attribute> iteratorTypes,
function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
- Optional<LinalgLoopDistributionOptions>) {
+ Optional<LinalgLoopDistributionOptions> distributionOptions) {
+ // Create procInfo so it dominate loops, if appropriate.
+ OpBuilder &builder = edsc::ScopedContext::getBuilderRef();
+ Location loc = edsc::ScopedContext::getLocation();
+ SmallVector<ProcInfo, 2> procInfo;
+ if (distributionOptions.hasValue())
+ procInfo = distributionOptions->procInfo(builder, loc, ArrayRef<Range>{});
+
SmallVector<Value, 4> lbs, ubs, steps;
unpackRanges(loopRanges, lbs, ubs, steps);
- edsc::loopNestBuilder(lbs, ubs, steps, iterArgInitValues, bodyBuilderFn);
+ LoopNest loopNest =
+ edsc::loopNestBuilder(lbs, ubs, steps, iterArgInitValues, bodyBuilderFn);
+
+ if (!distributionOptions.hasValue() || loopNest.loops.empty())
+ return;
+
+ // TODO: support distributionMethod, which is currently ignored.
+ for (auto it : llvm::zip(loopNest.loops, procInfo,
+ distributionOptions->distributionMethod))
+ mapLoopToProcessorIds(std::get<0>(it), std::get<1>(it).procId,
+ std::get<1>(it).nprocs);
}
/// Specialization to build affine "for" nest.
diff --git a/mlir/lib/Dialect/SCF/EDSC/Builders.cpp b/mlir/lib/Dialect/SCF/EDSC/Builders.cpp
index 45097186a248..d0ac5f0c3439 100644
--- a/mlir/lib/Dialect/SCF/EDSC/Builders.cpp
+++ b/mlir/lib/Dialect/SCF/EDSC/Builders.cpp
@@ -14,7 +14,7 @@
using namespace mlir;
using namespace mlir::edsc;
-mlir::scf::ValueVector
+mlir::scf::LoopNest
mlir::edsc::loopNestBuilder(ValueRange lbs, ValueRange ubs, ValueRange steps,
function_ref<void(ValueRange)> fun) {
// Delegates actual construction to scf::buildLoopNest by wrapping `fun` into
@@ -29,7 +29,7 @@ mlir::edsc::loopNestBuilder(ValueRange lbs, ValueRange ubs, ValueRange steps,
});
}
-mlir::scf::ValueVector
+mlir::scf::LoopNest
mlir::edsc::loopNestBuilder(Value lb, Value ub, Value step,
function_ref<void(Value)> fun) {
// Delegates to the ValueRange-based version by wrapping the lambda.
@@ -42,7 +42,7 @@ mlir::edsc::loopNestBuilder(Value lb, Value ub, Value step,
wrapper);
}
-mlir::scf::ValueVector mlir::edsc::loopNestBuilder(
+mlir::scf::LoopNest mlir::edsc::loopNestBuilder(
Value lb, Value ub, Value step, ValueRange iterArgInitValues,
function_ref<scf::ValueVector(Value, ValueRange)> fun) {
// Delegates actual construction to scf::buildLoopNest by wrapping `fun` into
@@ -61,7 +61,7 @@ mlir::scf::ValueVector mlir::edsc::loopNestBuilder(
});
}
-mlir::scf::ValueVector mlir::edsc::loopNestBuilder(
+mlir::scf::LoopNest mlir::edsc::loopNestBuilder(
ValueRange lbs, ValueRange ubs, ValueRange steps,
ValueRange iterArgInitValues,
function_ref<scf::ValueVector(ValueRange, ValueRange)> fun) {
diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp
index bc8671b9ba85..fe2eb9ced469 100644
--- a/mlir/lib/Dialect/SCF/SCF.cpp
+++ b/mlir/lib/Dialect/SCF/SCF.cpp
@@ -305,7 +305,7 @@ void ForOp::getNumRegionInvocations(ArrayRef<Attribute> operands,
step.getValue().getSExtValue());
}
-ValueVector mlir::scf::buildLoopNest(
+LoopNest mlir::scf::buildLoopNest(
OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs,
ValueRange steps, ValueRange iterArgs,
function_ref<ValueVector(OpBuilder &, Location, ValueRange, ValueRange)>
@@ -323,7 +323,7 @@ ValueVector mlir::scf::buildLoopNest(
assert(results.size() == iterArgs.size() &&
"loop nest body must return as many values as loop has iteration "
"arguments");
- return results;
+ return LoopNest();
}
// First, create the loop structure iteratively using the body-builder
@@ -372,11 +372,13 @@ ValueVector mlir::scf::buildLoopNest(
builder.setInsertionPointToEnd(loops.back().getBody());
builder.create<scf::YieldOp>(loc, results);
- // Return the results of the outermost loop.
- return ValueVector(loops.front().result_begin(), loops.front().result_end());
+ // Return the loops.
+ LoopNest res;
+ res.loops.assign(loops.begin(), loops.end());
+ return res;
}
-ValueVector mlir::scf::buildLoopNest(
+LoopNest mlir::scf::buildLoopNest(
OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs,
ValueRange steps,
function_ref<void(OpBuilder &, Location, ValueRange)> bodyBuilder) {
diff --git a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir
index 6ff4be0169fb..2a6a7ba7b7e3 100644
--- a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir
@@ -172,3 +172,43 @@ func @gemm6(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
// CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[ARG3]], %[[OFFSETX_2]]]
// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
+
+// -----
+
+// CHECK-LABEL: func @matmul_tensors(
+// CHECK-SAME: %[[TA:[0-9a-z]+]]: tensor<?x?xf32>
+// CHECK-SAME: %[[TB:[0-9a-z]+]]: tensor<?x?xf32>
+// CHECK-SAME: %[[TC:[0-9a-z]+]]: tensor<?x?xf32>) -> tensor<?x?xf32> {
+func @matmul_tensors(
+ %arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>)
+ -> tensor<?x?xf32> {
+// CHECK: %[[C8:.*]] = constant 8 : index
+// CHECK: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"}
+// CHECK: %[[NBLOCKSY:.*]] = "gpu.grid_dim"() {dimension = "y"}
+// CHECK: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"}
+// CHECK: %[[NBLOCKSX:.*]] = "gpu.grid_dim"() {dimension = "x"}
+// CHECK: %[[LBY:.*]] = muli %[[BIDY]], %[[C8]] : index
+// CHECK: %[[STEPY:.*]] = muli %[[NBLOCKSY]], %[[C8]] : index
+// CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor<?x?xf32>) {
+// CHECK: %[[LBX:.*]] = muli %[[BIDX]], %[[C8]] : index
+// CHECK: %[[STEPX:.*]] = muli %[[NBLOCKSX]], %[[C8]] : index
+// CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<?x?xf32>) {
+// CHECK: %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor<?x?xf32>) {
+// CHECK: %[[sTA:.*]] = subtensor %[[TA]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
+// CHECK: %[[sTB:.*]] = subtensor %[[TB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
+// CHECK: %[[sTC:.*]] = subtensor %[[TC2]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
+// CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// CHECK-SAME: init(%[[sTC]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+// CHECK: %[[TD:.*]] = subtensor_insert %[[sTD]] into %[[TC2]][{{.*}}] : tensor<?x?xf32> into tensor<?x?xf32>
+// CHECK: scf.yield %[[TD]] : tensor<?x?xf32>
+// CHECK: scf.yield %[[TD2]] : tensor<?x?xf32>
+// CHECK: scf.yield %[[TD1]] : tensor<?x?xf32>
+ %0 = linalg.matmul {__internal_linalg_transform__ = "tensors_distribute1"}
+ ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
+ init(%arg2: tensor<?x?xf32>)
+ -> tensor<?x?xf32>
+
+// CHECK: return %[[TD0]] : tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
+}
+
diff --git a/mlir/test/EDSC/builder-api-test.cpp b/mlir/test/EDSC/builder-api-test.cpp
index 1a866066523e..7677c175ec94 100644
--- a/mlir/test/EDSC/builder-api-test.cpp
+++ b/mlir/test/EDSC/builder-api-test.cpp
@@ -1223,7 +1223,7 @@ TEST_FUNC(builder_loop_for_yield) {
[&](Value iv, ValueRange args) {
Value sum = args[0] + args[1];
return scf::ValueVector{args[1], sum};
- });
+ }).getResults();
results[0] + results[1];
// clang-format off
diff --git a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp
index 253d4adf903c..8857bbe09eef 100644
--- a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp
+++ b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp
@@ -409,6 +409,22 @@ static void fillTileAndDistributePatterns(MLIRContext *context,
LinalgMarker(Identifier::get("distribute6", context),
Identifier::get("after_distribute6", context)));
}
+
+ {
+ LinalgLoopDistributionOptions cyclicNprocsEqNiters;
+ cyclicNprocsEqNiters.distributionMethod.resize(
+ 2, DistributionMethod::CyclicNumProcsEqNumIters);
+ cyclicNprocsEqNiters.procInfo =
+ getGpuProcIds<gpu::BlockIdOp, gpu::GridDimOp>;
+ patterns.insert<LinalgTilingPattern<MatmulOp>>(
+ context,
+ LinalgTilingOptions()
+ .setTileSizes({8, 8, 4})
+ .setLoopType(LinalgTilingLoopType::Loops)
+ .setDistributionOptions(cyclicNprocsEqNiters),
+ LinalgMarker(Identifier::get("tensors_distribute1", context),
+ Identifier::get("tensors_after_distribute1", context)));
+ }
}
static void
More information about the Mlir-commits
mailing list