[Mlir-commits] [mlir] [mlir][xegpu] SIMT distribution patterns for XeGPU CreateNdTdesc, LoadNd, StoreNd and Dpas Ops. (PR #135271)

Tue Apr 29 11:50:49 PDT 2025

================
@@ -628,11 +673,736 @@ void RunSGMapPropagation::printAnalysisResult(llvm::raw_ostream &os) {
     }
   }
   /// Print the analysis result for each function.
-  for (auto funcOp : funcOps) {
+  for (FunctionOpInterface funcOp : funcOps) {
     printFunctionResult(funcOp);
   }
 }
 
+namespace {
+
+///===----------------------------------------------------------------------===///
+/// LayoutAttrAssignment
+///===----------------------------------------------------------------------===///
+
+/// This class is responsible for assigning the layout attributes to the ops and
+/// their users based on the layout propagation analysis result.
+class LayoutAttrAssignment {
+public:
+  LayoutAttrAssignment(Operation *top,
+                       function_ref<LayoutInfo(Value)> getLayout)
+      : getAnalysisResult(getLayout), top(top) {}
+
+  LogicalResult run();
+
+private:
+  LogicalResult assign(Operation *op);
+  void assignToUsers(Value v, xegpu::LayoutAttr layout);
+  xegpu::LayoutAttr getLayoutAttrForValue(Value v);
+  LogicalResult resolveConflicts();
+  // Callable to get the layout of a value based on the layout propagation
+  // analysis.
+  function_ref<LayoutInfo(Value)> getAnalysisResult;
+  Operation *top;
+};
+
+} // namespace
+
+/// Helper to assign the layout attribute to the users of the value.
+void LayoutAttrAssignment::assignToUsers(Value v, xegpu::LayoutAttr layout) {
+  for (OpOperand &user : v.getUses()) {
+    Operation *owner = user.getOwner();
+    unsigned operandNumber = user.getOperandNumber();
+    /// Use a generic name for ease of querying the layout attribute later.
+    std::string attrName =
+        operandLayoutNamePrefix + std::to_string(operandNumber);
+    owner->setAttr(attrName, layout);
+  }
+}
+
+/// Convert the layout assigned to a value to xegpu::LayoutAttr.
+xegpu::LayoutAttr LayoutAttrAssignment::getLayoutAttrForValue(Value v) {
+  LayoutInfo layout = getAnalysisResult(v);
+  if (!layout.isAssigned())
+    return {};
+  SmallVector<int, 2> laneLayout, laneData;
+  for (auto [layout, data] : llvm::zip_equal(layout.getLayoutAsArrayRef(),
+                                             layout.getDataAsArrayRef())) {
+    laneLayout.push_back(static_cast<int>(layout));
+    laneData.push_back(static_cast<int>(data));
+  }
+  return xegpu::LayoutAttr::get(v.getContext(), laneLayout, laneData);
+}
+
+/// Assign xegpu::LayoutAttr to the op and its users. The layout is assigned
+/// based on the layout propagation analysis result.
+LogicalResult LayoutAttrAssignment::assign(Operation *op) {
+  /// For function ops, propagate the function argument layout to the users.
+  if (auto func = dyn_cast<FunctionOpInterface>(op)) {
+    for (BlockArgument arg : func.getArguments()) {
+      xegpu::LayoutAttr layoutInfo = getLayoutAttrForValue(arg);
+      if (layoutInfo) {
+        assignToUsers(arg, layoutInfo);
+      }
+    }
+    return success();
+  }
+  /// If no results, move on.
+  if (op->getNumResults() == 0)
+    return success();
+  /// If all the results are scalars, move on.
+  if (llvm::all_of(op->getResultTypes(),
+                   [](Type t) { return t.isIntOrIndexOrFloat(); }))
+    return success();
+  /// If the result is a tensor descriptor, attach the layout to the tensor
+  /// descriptor itself.
+  if (auto tensorDescTy =
+          dyn_cast<xegpu::TensorDescType>(op->getResult(0).getType())) {
+    xegpu::LayoutAttr layoutInfo = getLayoutAttrForValue(op->getResult(0));
+    if (!layoutInfo) {
+      LLVM_DEBUG(DBGS() << "No layout for result of " << *op << "\n");
+      return failure();
+    }
+
+    /// Clone the op, attach the layout to the result tensor descriptor, and
+    /// remove the original op.
+    OpBuilder builder(op);
+    Operation *newOp = builder.clone(*op);
+    auto newTensorDescTy = xegpu::TensorDescType::get(
+        tensorDescTy.getContext(), tensorDescTy.getShape(),
+        tensorDescTy.getElementType(), tensorDescTy.getEncoding(), layoutInfo);
+    newOp->getResult(0).setType(newTensorDescTy);
+    op->replaceAllUsesWith(newOp->getResults());
+    op->erase();
+    return success();
+  }
+  /// Otherwise simply attach the layout to the op itself.
+  for (auto [i, r] : llvm::enumerate(op->getResults())) {
+    xegpu::LayoutAttr layoutInfo = getLayoutAttrForValue(r);
+    if (layoutInfo) {
+      std::string attrName = resultLayoutNamePrefix + std::to_string(i);
+      op->setAttr(attrName, layoutInfo);
+      /// Attach the layout attribute to the users of the result.
+      assignToUsers(r, layoutInfo);
+    }
+  }
+  return success();
+}
+
+/// Walk the IR and attach xegpu::LayoutAttr to all ops and their users.
+LogicalResult LayoutAttrAssignment::run() {
+  auto walkResult = top->walk([&](Operation *op) {
+    if (failed(assign(op)))
+      return WalkResult::interrupt();
+    return WalkResult::advance();
+  });
+
+  if (walkResult.wasInterrupted())
+    return failure();
+
+  return resolveConflicts();
+}
+
+/// TODO: Implement the layout conflict resolution. This must ensure mainly two
+/// things:
+/// 1) Is a given layout supported by the op? (need to query the target
+///    HW info). Otherwise can we achive this layout using a layout conversion?
+/// 2) Do all the operands have the required layout? If not, can it
+///    be resolved using a layout conversion?
+LogicalResult LayoutAttrAssignment::resolveConflicts() { return success(); }
+
+namespace {
+
+///===----------------------------------------------------------------------===///
+/// SIMT Distribution Patterns
+///===----------------------------------------------------------------------===///
+
+/// Helper function to get  distributed vector type for a source vector type
+/// according to the lane_layout. We simply divide each dimension of tensor
+/// descriptor shape by corresponding lane_layout dimension. If array_length >
+/// 1, that is appended to the front of the disributed shape.
+/// NOTE: This is the vector type that will be returned by the
+/// gpu.warp_execute_on_lane0 op.
----------------
charithaintc wrote:

fixed. 

https://github.com/llvm/llvm-project/pull/135271