[mlir] [MLIR][Linalg] Add aggregate ops decomposition pass and softmax decom… (PR #97582)

Mon Jul 8 02:36:50 PDT 2024

================
@@ -2564,116 +2564,42 @@ void SoftmaxOp::getEffects(
 
 // Helper functions for softmax decomposition.
 // @{
-
-// Helper function to produce the iterator types (reduction or parallel) and
-// affine maps for the iterators used in the decomposition of softmax.
-// This method creates:
-// If allParallel == true:
-// - iterator type: {parallel, ..., parallel}
-// - affine maps:
-// -- identity with inputRank dimensions.
-// -- (d0, ..., dN) -> (d0, ..., d_dim-1, d_dim+1, ..., dN),
-//    where N == inputRank.
-//
-// If allParallel == false:
-// - iterator type at dim(i) == parallel for i != \p dim and
-//   dim(dim) == reduction.
-// - affine map:
-// -- identity with inputRank dimensions.
-// -- (d0, ..., dN) -> (d0, ..., d_dim-1, d_dim+1, ..., dN),
-//    where N == inputRank.
-static std::tuple<SmallVector<utils::IteratorType>, SmallVector<AffineMap>>
-computeIteratorTypesAndIndexingMaps(OpBuilder &builder, int64_t inputRank,
-                                    int64_t dim, bool allParallel = false) {
-  SmallVector<utils::IteratorType> iteratorTypes(inputRank,
-                                                 utils::IteratorType::parallel);
-  if (!allParallel)
-    iteratorTypes[dim] = utils::IteratorType::reduction;
-  MLIRContext *ctxt = builder.getContext();
-  auto identityMap = AffineMap::getMultiDimIdentityMap(inputRank, ctxt);
-  SmallVector<AffineExpr, 2> affineExprs;
-  for (int i = 0; i < inputRank; i++) {
-    if (i != dim)
-      affineExprs.push_back(mlir::getAffineDimExpr(i, ctxt));
-  }
-  auto reductionMap =
-      AffineMap::get(inputRank, /*symbols=*/0, affineExprs, ctxt);
-  SmallVector<AffineMap> indexingMaps{identityMap, reductionMap};
-  return std::make_tuple(iteratorTypes, indexingMaps);
-}
-
-// Helper function to produce a linalg.generic that computes a reduction on
-// dimension \p dim with the operation type \p T.
-template <typename T>
-static Value reduce(OpBuilder &builder, Location loc, Value input, Value output,
-                    int64_t dim) {
-  auto inputType = cast<ShapedType>(input.getType());
-  ArrayRef<int64_t> inputShape = inputType.getShape();
-  int64_t inputRank = inputShape.size();
-  auto [iteratorTypes, indexingMaps] =
-      computeIteratorTypesAndIndexingMaps(builder, inputRank, dim);
-  assert(indexingMaps.size() == 2 &&
-         "We should have two maps: 1 for the input, 1 for the output");
-  assert(indexingMaps[0].isIdentity() && "input map should be identity");
-
-  auto genericOp = builder.create<linalg::GenericOp>(
-      loc, output.getType(), input, output, indexingMaps, iteratorTypes,
-      [&](OpBuilder &b, Location loc, ValueRange args) {
-        Value result = b.create<T>(loc, args[0], args[1]);
-        b.create<linalg::YieldOp>(loc, result);
-      });
-  return genericOp.getResult(0);
-}
-
-/// Produce a linalg generic that computes the second step of the softmax
-/// decomposition: res = exp(input - max), where \p max is the max of \p input
-/// on dimension \p dim.
-static Value buildSubAndExpOp(OpBuilder &builder, Location loc, Value input,
-                              Value max, Value output, int64_t dim) {
-  auto inputType = cast<ShapedType>(input.getType());
-  ArrayRef<int64_t> inputShape = inputType.getShape();
-  int64_t inputRank = inputShape.size();
-  auto [iteratorTypes, indexingMaps] = computeIteratorTypesAndIndexingMaps(
-      builder, inputRank, dim, /*allParallel=*/true);
-  assert(indexingMaps.size() == 2 && "We should have one map for each input");
-  assert(indexingMaps[0].isIdentity() && "input map should be identity");
-  // Add the affine map for the output argument.
-  indexingMaps.push_back(indexingMaps[0]);
-  auto genericOp = builder.create<linalg::GenericOp>(
-      loc, input.getType(), ValueRange{input, max}, output, indexingMaps,
-      iteratorTypes, [&](OpBuilder &b, Location loc, ValueRange args) {
-        Value diff = b.create<arith::SubFOp>(loc, args[0], args[1]);
-        Value result = b.create<math::ExpOp>(loc, diff);
-        b.create<linalg::YieldOp>(loc, result);
-      });
-  return genericOp.getResult(0);
-}
-
-/// Produce a linalg generic that computes the final step of the softmax
-/// decomposition.
-/// \returns  linalg.generic ins(\p numerator, \p denominator) outs(\p output) {
-///   yield  n / d
-/// }
-static Value buildDivOp(OpBuilder &builder, Location loc, Value numerator,
-                        Value denominator, Value output, int64_t dim) {
-  auto inputType = cast<ShapedType>(numerator.getType());
-  ArrayRef<int64_t> inputShape = inputType.getShape();
-  int64_t inputRank = inputShape.size();
-  auto [iteratorTypes, indexingMaps] = computeIteratorTypesAndIndexingMaps(
-      builder, inputRank, dim, /*allParallel=*/true);
-  assert(indexingMaps.size() == 2 &&
-         "We should have one map for each input (2)");
-  assert(indexingMaps[0].isIdentity() && "Numerator map should be identity");
-  // Add the affine map for the output tensor.
-  indexingMaps.push_back(indexingMaps[0]);
-  auto genericOp = builder.create<linalg::GenericOp>(
-      loc, numerator.getType(), ValueRange{numerator, denominator}, output,
-      indexingMaps, iteratorTypes,
-      [&](OpBuilder &b, Location loc, ValueRange args) {
-        Value result = b.create<arith::DivFOp>(loc, args[0], args[1]);
-        b.create<linalg::YieldOp>(loc, result);
-      });
-  return genericOp.getResult(0);
+TypedAttr createInitValueForReduceMaxOp(Type type, OpBuilder &b) {
----------------
banach-space wrote:

> createInitValueForReduceMaxOp

This might be something obvious, but where does reduction happen?

https://github.com/llvm/llvm-project/pull/97582