[Mlir-commits] [mlir] [MLIR][XeGPU] Add unroll patterns for XeGPU (1/N) (PR #137010)

Thu May 8 14:17:04 PDT 2025

================
@@ -0,0 +1,427 @@
+//===- XeGPUUnroll.cpp - patterns to do unrolling ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains patterns for unrolling XeGPU operations. It follows a
+// similar concept and design as vector unroll patterns, serving as a complement
+// to them.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/XeGPU/Transforms/Passes.h"
+
+#include "mlir/Dialect/Utils/IndexingUtils.h"
+#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
+#include "mlir/Dialect/XeGPU/Transforms/Transforms.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include <numeric>
+
+namespace mlir {
+namespace xegpu {
+#define GEN_PASS_DEF_XEGPUUNROLL
+#include "mlir/Dialect/XeGPU/Transforms/Passes.h.inc"
+} // namespace xegpu
+} // namespace mlir
+
+#define DEBUG_TYPE "xegpu-unroll"
+#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
+#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
+
+using namespace mlir;
+
+namespace {
+
+template <typename SourceOp>
+struct UnrollPattern : public OpRewritePattern<SourceOp> {
+  UnrollPattern(MLIRContext *context, const xegpu::UnrollOptions &options,
+                PatternBenefit benefit = 1)
+      : OpRewritePattern<SourceOp>(context, benefit), options(options) {}
+
+protected:
+  /// Return the target shape for the given `op`. Return std::nullopt if the
+  /// op shouldn't be or cannot be unrolled.
+  std::optional<SmallVector<int64_t>> getTargetShape(Operation *op) const {
+    LDBG("");
+    LDBG("Get unroll shape for: " << *op);
+
+    if (options.filterConstraint && failed(options.filterConstraint(op))) {
+      LDBG("--no filter constraint -> BAIL");
+      return std::nullopt;
+    }
+
+    assert(options.nativeShape &&
+           "expects the native shape for native shape call back function.");
+    auto nativeShape = options.nativeShape(op);
+    return nativeShape;
+  }
+
+  SmallVector<Type> getUnrolledTypes(ShapedType type,
+                                     ArrayRef<int64_t> tileShape) const {
+    return options.getUnrolledTypes(type, tileShape);
+  }
+
+  /// Emulate the the unpack behavior using insert_strided_slice for VectorType
+  /// values and unrealized_conversion_cast for TensorDescType values.
+  Value unpack(ValueRange srcs, Type destTy, ArrayRef<int64_t> blockSize,
+               Location loc, PatternRewriter &rewriter) const {
+    if (auto vecTy = dyn_cast<VectorType>(destTy)) {
+      assert(vecTy.getRank() == static_cast<int64_t>(blockSize.size()) &&
+             "Expecting blockSize size to match the rank of destTy.");
+      auto shape = vecTy.getShape();
+      auto zeroAttr = rewriter.getZeroAttr(vecTy.getElementType());
+
+      Value result = rewriter.create<arith::ConstantOp>(
+          loc, vecTy, DenseElementsAttr::get(vecTy, zeroAttr));
+      for (auto [src, offsets] :
+           llvm::zip_equal(srcs, StaticTileOffsetRange(shape, blockSize))) {
+        SmallVector<int64_t> staticStrides(offsets.size(), 1);
+        result = rewriter.create<vector::InsertStridedSliceOp>(
+            loc, src, result, offsets, staticStrides);
+      }
+      return result;
+    }
+
+    if (isa<xegpu::TensorDescType>(destTy)) {
+      auto attr = NamedAttribute(rewriter.getStringAttr(unpackAttrName),
+                                 rewriter.getUnitAttr());
+      auto blkAttr = NamedAttribute(rewriter.getStringAttr(blockAttrName),
+                                    rewriter.getDenseI64ArrayAttr(blockSize));
+      auto castOp = rewriter.create<UnrealizedConversionCastOp>(
+          loc, destTy, srcs, ArrayRef<NamedAttribute>({attr, blkAttr}));
+      return castOp.getResult(0);
+    }
+
+    llvm_unreachable("Unexpected destTy.");
+    return Value();
+  }
+
+  /// Emulate the the pack behavior using extract_strided_slice for VectorType
+  /// values and unrealized_conversion_cast for TensorDescType values.
+  SmallVector<Value> pack(Value src, TypeRange destTypes,
+                          ArrayRef<int64_t> blockSize, Location loc,
+                          PatternRewriter &rewriter) const {
+    if (auto vecTy = dyn_cast<VectorType>(src.getType())) {
+      assert(vecTy.getRank() == static_cast<int64_t>(blockSize.size()) &&
+             "Expecting blockSize size to match the rank of src.");
+      auto shape = vecTy.getShape();
+      SmallVector<Value> results;
+      for (SmallVector<int64_t> offsets :
+           StaticTileOffsetRange(shape, blockSize)) {
+        SmallVector<int64_t> staticStrides(offsets.size(), 1);
+        auto slice = rewriter.create<vector::ExtractStridedSliceOp>(
+            loc, src, offsets, blockSize, staticStrides);
+        results.push_back(slice);
+      }
+      return results;
+    }
+
+    if (isa<xegpu::TensorDescType>(src.getType())) {
+      auto attr = NamedAttribute(rewriter.getStringAttr(packAttrName),
+                                 rewriter.getUnitAttr());
+      auto blkAttr = NamedAttribute(rewriter.getStringAttr(blockAttrName),
+                                    rewriter.getDenseI64ArrayAttr(blockSize));
+      auto castOp = rewriter.create<UnrealizedConversionCastOp>(
+          loc, destTypes, src, ArrayRef<NamedAttribute>({attr, blkAttr}));
+      return castOp.getResults();
+    }
+
+    llvm_unreachable("Unexpected src type.");
+    return SmallVector<Value>();
+  }
+
+private:
+  const char *const packAttrName = "__xetile_blocking_pack__";
----------------
Garra1980 wrote:

xetile->xegpu I guess, here and in  tests

https://github.com/llvm/llvm-project/pull/137010