[Mlir-commits] [mlir] [mlir][xegpu] SIMT distribution patterns for XeGPU CreateNdTdesc, LoadNd, StoreNd and Dpas Ops. (PR #135271)
Chao Chen
llvmlistbot at llvm.org
Thu Apr 24 08:57:00 PDT 2025
================
@@ -633,6 +675,743 @@ void RunSGMapPropagation::printAnalysisResult(llvm::raw_ostream &os) {
}
}
+namespace {
+
+///===----------------------------------------------------------------------===///
+/// LayoutAttrAssignment
+///===----------------------------------------------------------------------===///
+
+/// This class is responsible for assigning the layout attributes to the ops and
+/// their users based on the layout propagation analysis result.
+class LayoutAttrAssignment {
+public:
+ LayoutAttrAssignment(Operation *top,
+ function_ref<LayoutInfo(Value)> getLayout)
+ : getAnalysisResult(getLayout), top(top) {}
+
+ LogicalResult run();
+
+private:
+ LogicalResult assign(Operation *op);
+ void assignToUsers(Value v, xegpu::LayoutAttr layout);
+ xegpu::LayoutAttr getLayoutAttrForValue(Value v);
+ LogicalResult resolveConflicts();
+ function_ref<LayoutInfo(Value)>
+ getAnalysisResult; // Callable to get the layout of a value based on the
+ // layout propagation analysis.
+ Operation *top;
+};
+
+} // namespace
+
+/// Helper to assign the layout attribute to the users of the value.
+void LayoutAttrAssignment::assignToUsers(Value v, xegpu::LayoutAttr layout) {
+ for (OpOperand &user : v.getUses()) {
+ Operation *owner = user.getOwner();
+ unsigned operandNumber = user.getOperandNumber();
+ /// Use a generic name for ease of querying the layout attribute later.
+ std::string attrName =
+ operandLayoutNamePrefix + std::to_string(operandNumber);
+ owner->setAttr(attrName, layout);
+ }
+}
+
+/// Convert the layout assigned to a value to xegpu::LayoutAttr.
+xegpu::LayoutAttr LayoutAttrAssignment::getLayoutAttrForValue(Value v) {
+ auto layout = getAnalysisResult(v);
+ if (!layout.isAssigned())
+ return {};
+ SmallVector<int, 2> laneLayout, laneData;
+ for (auto [layout, data] : llvm::zip_equal(layout.getLayoutAsArrayRef(),
+ layout.getDataAsArrayRef())) {
+ laneLayout.push_back(static_cast<int>(layout));
+ laneData.push_back(static_cast<int>(data));
+ }
+ return xegpu::LayoutAttr::get(v.getContext(), laneLayout, laneData);
+}
+
+/// Assign xegpu::LayoutAttr to the op and its users. The layout is assigned
+/// based on the layout propagation analysis result.
+LogicalResult LayoutAttrAssignment::assign(Operation *op) {
+ /// For function ops, propagate the function argument layout to the users.
+ if (auto func = dyn_cast<FunctionOpInterface>(op)) {
+ for (auto arg : func.getArguments()) {
+ auto layoutInfo = getLayoutAttrForValue(arg);
+ if (layoutInfo) {
+ assignToUsers(arg, layoutInfo);
+ }
+ }
+ return success();
+ }
+ /// If no results, move on.
+ if (op->getNumResults() == 0)
+ return success();
+ /// If all the results are scalars, move on.
+ if (llvm::all_of(op->getResultTypes(),
+ [](Type t) { return t.isIntOrIndexOrFloat(); }))
+ return success();
+ /// If the result is a tensor descriptor, attach the layout to the tensor
+ /// descriptor itself.
+ if (auto tensorDescTy =
+ dyn_cast<xegpu::TensorDescType>(op->getResult(0).getType())) {
+ auto layoutInfo = getLayoutAttrForValue(op->getResult(0));
+ if (!layoutInfo) {
+ LLVM_DEBUG(DBGS() << "No layout for result of " << *op << "\n");
+ return failure();
+ }
+
+ /// Clone the op, attach the layout to the result tensor descriptor, and
+ /// remove the original op.
+ OpBuilder builder(op);
+ auto *newOp = builder.clone(*op);
+ auto newTensorDescTy = xegpu::TensorDescType::get(
+ tensorDescTy.getContext(), tensorDescTy.getShape(),
+ tensorDescTy.getElementType(), tensorDescTy.getEncoding(), layoutInfo);
+ newOp->getResult(0).setType(newTensorDescTy);
+ op->replaceAllUsesWith(newOp->getResults());
+ op->erase();
+ return success();
+ }
+ /// Otherwise simply attach the layout to the op itself.
+ for (auto [i, r] : llvm::enumerate(op->getResults())) {
+ auto layoutInfo = getLayoutAttrForValue(r);
+ if (layoutInfo) {
+ auto attrName = resultLayoutNamePrefix + std::to_string(i);
+ op->setAttr(attrName, layoutInfo);
+ /// Attach the layout attribute to the users of the result.
+ assignToUsers(r, layoutInfo);
+ }
+ }
+ return success();
+}
+
+/// Walk the IR and attach xegpu::LayoutAttr to all ops and their users.
+LogicalResult LayoutAttrAssignment::run() {
+ auto walkResult = top->walk([&](Operation *op) {
+ if (failed(assign(op)))
+ return WalkResult::interrupt();
+ return WalkResult::advance();
+ });
+
+ if (walkResult.wasInterrupted())
+ return failure();
+
+ return resolveConflicts();
+}
+
+/// TODO: Implement the layout conflict resolution. This must ensure mainly two
+/// things:
+/// 1) Is a given layout supported by the op? (need to query the target
+/// HW info). Otherwise can we achive this layout using a layout conversion?
+/// 2) Do all the operands have the required layout? If not, can it
+/// be resolved using a layout conversion?
+LogicalResult LayoutAttrAssignment::resolveConflicts() { return success(); }
+
+namespace {
+
+///===----------------------------------------------------------------------===///
+/// SIMT Distribution Patterns
+///===----------------------------------------------------------------------===///
+
+/// Helper function to get distributed vector type for a source vector type
+/// according to the lane_layout. We simply divide each dimension of tensor
+/// descriptor shape by corresponding lane_layout dimension. If array_length >
+/// 1, that is appended to the front of the disributed shape.
+/// NOTE: This is the vector type that will be returned by the
+/// gpu.warp_execute_on_lane0 op.
+///
+/// Examples:
+/// | original vector shape | lane_layout | distributed vector shape |
+/// |-----------------------|-------------|--------------------------|
+/// | 32x16 | [1, 16] | 32x1 |
+/// | 32x16 | [2, 8] | 16x2 |
+/// | 2x32x16 | [1, 16] | 2x32x1 |
+FailureOr<VectorType> getDistVecTypeBasedOnLaneLayout(xegpu::LayoutAttr layout,
+ VectorType originalType) {
+ if (!layout)
+ return failure();
+
+ auto laneLayout = layout.getLaneLayout().asArrayRef();
+ assert(originalType.getShape().size() >= laneLayout.size() &&
+ "Rank of the original vector type should be greater or equal to the "
+ "size of the lane layout to distribute the vector type.");
+ SmallVector<int64_t> distributedShape(originalType.getShape());
+ /// Only distribute the last `laneLayout.size()` dimensions. The remaining
+ /// dimensions are not distributed.
+ unsigned distributionStart = originalType.getRank() - laneLayout.size();
+ for (auto [i, dim] : llvm::enumerate(originalType.getShape())) {
+ if (i < distributionStart) {
+ continue;
+ }
+ /// Check if the dimension can be distributed evenly.
+ if (dim % laneLayout[i - distributionStart] != 0)
+ return failure();
+ distributedShape[i] = dim / laneLayout[i - distributionStart];
+ }
+ return VectorType::get(distributedShape, originalType.getElementType());
+}
+
+/// Get the distributed vector type for a source vector type according to a
+/// xegpu::LayoutAttr.
+static VectorType getDistributedVectorType(xegpu::LayoutAttr layout,
+ VectorType originalType) {
+ auto shape = originalType.getShape();
+ auto distVecTyOrFailure =
+ xegpu::TensorDescType::get(shape, originalType.getElementType(),
----------------
chencha3 wrote:
Sounds good to me. How do you feel about making them as static method for XeGPUDialect? I met a similar issue and did so in https://github.com/llvm/llvm-project/pull/137010. Alternatively, we can create an Utils file.
https://github.com/llvm/llvm-project/pull/135271
More information about the Mlir-commits
mailing list