[Mlir-commits] [mlir] [MLIR][Linalg] Add pass to convert linalg.generic back to named ops (PR #95656)
Andrzej WarzyĆski
llvmlistbot at llvm.org
Thu Jun 27 04:18:27 PDT 2024
================
@@ -58,6 +68,195 @@ static bool areBinOpsSwapped(GenericOp genericOp) {
return swapped;
}
+//===----------------------------------------------------------------------===//
+// Specialize linalg generic to matmul variants.
+//===----------------------------------------------------------------------===//
+/// Identifies linalg.generic that is essentially named op of the form:
+// ` linalg.{batch_}?matmul{_transpose_a | _transpose_b}? `
+//
+// It is possible that a linalg.generic may be implementing a matmul but not
+// in a straight-forward way e.g. below is matrix multiply over some slice
+// ```
+// %0 = linalg.generic {
+// indexing_maps = [affine_map<(d0, d1, d2) -> (3, d1, d0)>,
+// affine_map<(d0, d1, d2) -> (d0, 5, d2)>,
+// affine_map<(d0, d1, d2) -> (d2, d1, 13)>],
+// iterator_types = ["parallel", "parallel", "parallel"]}
+// ins(%A, %B : tensor<20x20x20xf32>, tensor<20x20x20xf32>)
+// outs(%C : tensor<20x20x20xf32>) {
+// ^bb0(%a: f32, %b: f32, %c : f32):
+// %mul = arith.mulf %a, %b : f32
+// %add = arith.addf %mul, %c : f32
+// linalg.yield %add : f32
+// } -> tensor<20x20x20xf32>
+// ```
+// It is not possible to represent above as named op.
+// e.g. linalg.batch_matmul(%A, %B : tensor<20x20x20xf32>, ...) is
+// not the same as linalg.generic above.
+namespace {
+enum class IndexMatchResult {
+ Match = 0, // identity map.
+ Transposed, // transposed map.
+ Mismatch // none of the above.
+};
+
+// Consider the A matrix in `C[M,N] = A[M,K] * B[K,N]`. Below, we
+// check whether the index map of A is identity (match), transposed, or
+// something completely different (mis-match).
+// The naming and explanation is in terms of A, but the function checks
+// effectively maps for all A, B, C i.e. <M,N>, <M, K>, <K,N>.
+static IndexMatchResult matchOperandMap(AffineMap map, unsigned batchSize,
+ unsigned expectedPosOfM,
+ unsigned expectedPosOfK) {
+ // Get the matrix multiply indices. They are past the batch indices.
+ auto exprOfM = map.getResults()[batchSize];
+ auto exprOfK = map.getResults()[batchSize + 1];
+
+ // They should be pure dim ids.
+ if (exprOfM.getKind() != AffineExprKind::DimId ||
+ exprOfK.getKind() != AffineExprKind::DimId)
+ return IndexMatchResult::Mismatch;
+
+ auto posM = cast<AffineDimExpr>(exprOfM).getPosition();
+ auto posK = cast<AffineDimExpr>(exprOfK).getPosition();
+
+ if (expectedPosOfM == posM && expectedPosOfK == posK)
+ return IndexMatchResult::Match;
+
+ if (expectedPosOfM == posK && expectedPosOfK == posM)
+ return IndexMatchResult::Transposed;
+
+ return IndexMatchResult::Mismatch;
+}
+
+// All the variants `linalg.{batch_}?matmul{_transpose_a | _transpose_b}?`
+// have same number of input/output.
+template <typename Variant>
+static LinalgOp replaceWithMatmulVariant(RewriterBase &rewriter, GenericOp op) {
+ LinalgOp namedOp = rewriter.replaceOpWithNewOp<Variant>(
+ op, ValueRange{op.getDpsInputs()[0], op.getDpsInputs()[1]},
+ ValueRange{op.getDpsInits()[0]});
+ return namedOp;
+}
+
+// Converts linalg.generic to named linalg.*matmul* where possible.
+static FailureOr<LinalgOp> specializeLinalgContractions(RewriterBase &rewriter,
+ GenericOp genericOp) {
+ if (genericOp.getNumDpsInputs() != 2 || genericOp.getNumDpsInits() != 1)
+ return failure();
+
+ // Linalg generic contraction can be across multiple axis but for matmul
+ // variants it must be one.
+ if (genericOp.getNumReductionLoops() != 1)
+ return failure();
+
+ // Must be projected permutations.
+ auto mapRange = genericOp.getIndexingMapsArray();
+ if (llvm::any_of(mapRange,
+ [](AffineMap m) { return !m.isProjectedPermutation(); }))
+ return failure();
+
+ // matmul contractions are of the form:
+ // %0 = <elemwise>(permutation-of(cu(block-argument-0),
+ // cu(block-argument-1)))
+ // %1 = <reduce>(permutation-of(cu(%0), cu(block-argument-2)))
+ //
+ // where <elemwise> and <reduce> are binary operations constituting a
+ // contraction (in the canonical case, <elemwise> is a multiplication and
+ // <reduce> is an addition). All operands of all operations may be supplied
+ // through a chain of side effect-free unary operations, such as casts,
+ // which is denoted as `cu` above.
+ if (!mlir::linalg::detail::isContractionBody(
+ *genericOp.getBlock(), [](Operation *first, Operation *second) {
+ if ((isa<arith::MulFOp>(first) && isa<arith::AddFOp>(second)) ||
+ (isa<arith::MulIOp>(first) && isa<arith::AddIOp>(second)) ||
+ (isa<complex::MulOp>(first) && isa<complex::AddOp>(second)))
+ return true;
+ return false;
+ }))
+ return failure();
+
+ // Finds 2 parallel (m and n) and 1 reduction (k) dimension candidates that
+ // form a matmul subcomputation. These dimensions are such that:
+ // 1. The m dimension is involved in an outer-product along LHS
+ // (i.e. it is a permutation on RES and LHS and does not appear in RHS).
+ // 2. The n dimension is involved in an outer-product along RHS
+ // (i.e. it is a permutation on RES and RHS and does not appear in LHS).
+ // 3. The k dimension appears as a permutation on LHS and RHS.
+ // 4. m, n and k appear only once in any given indexing.
+ // 5. Optional batch dimensions that appear in all operands are captured.
+ auto res = inferContractionDims(genericOp);
+ assert(succeeded(res) && "unexpected failure to infer contraction dims");
+ auto dims = *res;
+
+ // Other than `batch`, other dim sizes must be 1 for linalg.*_matmul_*.
----------------
banach-space wrote:
Now I understand where the confusion is coming from. For me, "dim sizes" in `A = M x K` are `M` and `K`. Whereas this is checking e.g. the number of "contraction dims corresponding to K as inferred by `inferControctionDims`"? So, `M` and `K` can indeed be "1". Could you clarify in the comment?
Also, are you able to add a negative test to exercise this check?
https://github.com/llvm/llvm-project/pull/95656
More information about the Mlir-commits
mailing list