[Mlir-commits] [mlir] [mlir][xegpu] SIMT distribution patterns for XeGPU CreateNdTdesc, LoadNd, StoreNd and Dpas Ops. (PR #135271)

Tue Apr 29 11:33:26 PDT 2025

================
@@ -0,0 +1,83 @@
+//===---- XeGPUUtils.cpp - MLIR Utilities for XeGPUOps   ------------------===//
+//
+// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utility methods for working with the XeGPU dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/XeGPU/Utils/XeGPUUtils.h"
+#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
+#include <cstdint>
+#include <numeric>
+
+using namespace mlir;
+
+FailureOr<VectorType>
+mlir::xegpu::getDistributedVectorType(xegpu::TensorDescType tdescTy) {
+  auto layout = llvm::dyn_cast_if_present<LayoutAttr>(tdescTy.getLayout());
+  // It only works for subgroup level layout, which only has lane_layout
+  // and lane_data, and is to distribute a SIMD code into SIMT code.
+  if (!layout || !layout.isSgLayout())
+    return failure();
+
+  SmallVector<int64_t> laneData(layout.getLaneData().asArrayRef());
+  SmallVector<int64_t> laneLayout(layout.getLaneLayout().asArrayRef());
+  auto tdescShape = tdescTy.getShape();
+  auto elementType = tdescTy.getElementType();
+
+  // compute sgSize by multiply elements of laneLayout
+  // e.g. for 2D layout, sgSize = laneLayout[0] * laneLayout[1]
+  // e.g. for 1D layout, sgSize = laneLayout[0]
+  auto sgSize = std::accumulate(laneLayout.begin(), laneLayout.end(), 1,
+                                std::multiplies<int64_t>());
+
+  // Case 1: regular loads/stores
+  auto scatterAttr = tdescTy.getEncodingAsScatterTensorDescAttr();
+  if (scatterAttr) {
+    auto chunkSize = scatterAttr.getChunkSize().getInt();
+    // Verify if the first dimension of the tensor descriptor shape is
+    // distributable.
+    assert(tdescShape[0] == laneLayout[0] &&
+           "tensor descriptor shape is not distributable");
+    return VectorType::get({chunkSize}, elementType);
+  }
+
+  // Case 2: block loads/stores
+  // Check if the tensor descriptor shape is distributable.
+  int64_t tensorSize = 1;
+  for (auto [tdescDim, laneDim, laneDataDim] :
+       llvm::zip_equal(tdescShape, laneLayout, laneData)) {
+    assert((tdescDim % (laneDim * laneDataDim) == 0) &&
+           "tensor descriptor shape is not distributable");
+    tensorSize *= tdescDim;
+  }
+  // tensorSize must be adjusted for array_length.
+  tensorSize *= tdescTy.getArrayLength();
+
+  return VectorType::get({tensorSize / sgSize}, elementType);
+}
+
+FailureOr<VectorType>
+mlir::xegpu::getDistributedVectorType(VectorType originalType,
+                                      xegpu::LayoutAttr layout) {
+  int64_t rank = originalType.getRank();
+  /// Distributed vector type is only supported for 1D, 2D and 3D vectors.
+  if (rank < 1 || rank > 3)
+    return failure();
+  ArrayRef<int64_t> shape = originalType.getShape();
+  /// arrayLength is 1 for 1D and 2D vectors, and equal to the first dimension
+  /// of the 3D vector.
+  int arrayLength = 1;
+  if (rank == 3)
+    arrayLength = shape[0];
+  auto helperTdescTy = xegpu::TensorDescType::get(
+      shape, originalType.getElementType(), arrayLength,
----------------
charithaintc wrote:

yes. you are right. fixed it.

https://github.com/llvm/llvm-project/pull/135271