[Mlir-commits] [mlir] Revert "[mlir][xegpu] Add XeGPU subgroup map propagation analysis for XeGPU SIMT distribution." (PR #131364)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Fri Mar 14 10:32:56 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
Author: Charitha Saumya (charithaintc)
<details>
<summary>Changes</summary>
Reverts llvm/llvm-project#<!-- -->130240
---
Patch is 68.11 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131364.diff
4 Files Affected:
- (modified) mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td (-15)
- (modified) mlir/lib/Dialect/XeGPU/Transforms/CMakeLists.txt (-1)
- (removed) mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp (-662)
- (removed) mlir/test/Dialect/XeGPU/subgroup-map-propagation.mlir (-563)
``````````diff
diff --git a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
index 3e81f2d0ed786..1ecd6ce95322b 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
@@ -23,19 +23,4 @@ def XeGPUFoldAliasOps : Pass<"xegpu-fold-alias-ops"> {
];
}
-def XeGPUSubgroupDistribute : Pass<"xegpu-subgroup-distribute"> {
- let summary = "Distribute XeGPU ops to work items";
- let description = [{
- The pass distributes subgroup level (SIMD) XeGPU ops to work items.
- }];
- let dependentDialects = [
- "memref::MemRefDialect", "xegpu::XeGPUDialect", "vector::VectorDialect"
- ];
- let options = [
- Option<"printOnly", "print-analysis-only", "bool",
- /*default=*/"false",
- "Print the result of the subgroup map propagation analysis and exit.">
- ];
-}
-
#endif // MLIR_DIALECT_XEGPU_TRANSFORMS_PASSES_TD
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/Transforms/CMakeLists.txt
index 124e904edb543..7fb64d3b97b87 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/XeGPU/Transforms/CMakeLists.txt
@@ -1,6 +1,5 @@
add_mlir_dialect_library(MLIRXeGPUTransforms
XeGPUFoldAliasOps.cpp
- XeGPUSubgroupDistribute.cpp
ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/XeGPU
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
deleted file mode 100644
index 86e07697f437c..0000000000000
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
+++ /dev/null
@@ -1,662 +0,0 @@
-//===- XeGPUSubgroupDistribute.cpp - XeGPU Subgroup Distribute Pass -------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#include "mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h"
-#include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h"
-#include "mlir/Analysis/DataFlow/SparseAnalysis.h"
-#include "mlir/Analysis/DataFlowFramework.h"
-#include "mlir/Dialect/GPU/IR/GPUDialect.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/Vector/IR/VectorOps.h"
-#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
-#include "mlir/Dialect/XeGPU/Transforms/Passes.h"
-#include "mlir/Dialect/XeGPU/Transforms/Transforms.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Interfaces/FunctionInterfaces.h"
-#include "llvm/ADT/TypeSwitch.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace mlir {
-namespace xegpu {
-#define GEN_PASS_DEF_XEGPUSUBGROUPDISTRIBUTE
-#include "mlir/Dialect/XeGPU/Transforms/Passes.h.inc"
-} // namespace xegpu
-} // namespace mlir
-
-using namespace mlir;
-using namespace mlir::dataflow;
-
-/// HW dependent constants.
-/// TODO: These constants should be queried from the target information.
-constexpr unsigned subgroupSize = 16; // How many work items in a subgroup.
-/// If DPAS A or B operands have low precision element types they must be packed
-/// according to the following sizes.
-constexpr unsigned packedSizeInBitsForDefault =
- 16; // Minimum packing size per register for DPAS A.
-constexpr unsigned packedSizeInBitsForDpasB =
- 32; // Minimum packing size per register for DPAS B.
-
-namespace {
-
-///===----------------------------------------------------------------------===///
-/// Layout
-///===----------------------------------------------------------------------===///
-
-/// Helper class to store the ND layout of work items within a subgroup and data
-/// owned by each work item.
-struct Layout {
- SmallVector<int64_t, 3> layout;
- Layout() = default;
- Layout(const Layout &other) = default;
- Layout(std::initializer_list<int64_t> list) : layout(list) {}
- void print(llvm::raw_ostream &os) const;
- size_t size() const { return layout.size(); }
- int64_t operator[](size_t idx) const;
-};
-
-void Layout::print(llvm::raw_ostream &os) const {
- os << "[";
- llvm::interleaveComma(layout, os);
- os << "]";
-}
-
-int64_t Layout::operator[](size_t idx) const {
- assert(idx < layout.size() && "Index out of bounds.");
- return layout[idx];
-}
-
-/// WiLayout represents the layout of work items within a subgroup when it
-/// accesses some value. WiData represents the layout of data owned by each work
-/// item.
-using WiLayout = Layout;
-using WiData = Layout;
-
-///===----------------------------------------------------------------------===///
-/// SGMap
-///===----------------------------------------------------------------------===///
-
-/// Helper class for tracking the analysis state of a value. For SGPropagation,
-/// the analysis state is simply the wi_layout and wi_data of each value.
-/// Purpose of this analysis to propagate some unique layout for each value in
-/// the program starting from some known values (like DPAS, StoreNd, etc.).
-///
-/// Given this, SGMap satisifies the following properties:
-/// 1) SGMap is a lattice with two states - assigned and not assigned.
-/// 2) Two SGMap values are equal if they are both assigned or both not
-/// assigned. The concrete value of assigned state does not matter.
-/// 3) The meet operator works as follows:
-/// - If current state is assigned, return the current state. (already
-/// a unique layout is assigned. don't change it)
-/// - Otherwise, return the other state.
-
-struct SGMap {
-private:
- WiLayout wiLayout;
- WiData wiData;
-
-public:
- SGMap() = default;
- SGMap(const SGMap &other) = default;
- SGMap(const WiLayout &layout, const WiData &data)
- : wiLayout(layout), wiData(data) {}
-
- /// Two lattice values are equal if they have `some` layout. The actual
- /// content of the layout does not matter.
- bool operator==(const SGMap &other) const {
- return this->isAssigned() == other.isAssigned();
- }
-
- static SGMap meet(const SGMap &lhs, const SGMap &rhs);
-
- static SGMap join(const SGMap &lhs, const SGMap &rhs);
-
- void print(raw_ostream &os) const;
-
- bool isAssigned() const { return wiLayout.size() > 0 && wiData.size() > 0; }
-
- SGMap getTransposedLayout(ArrayRef<int64_t> permutation) const;
-
- const WiLayout &getLayout() const { return wiLayout; }
- const WiData &getData() const { return wiData; }
-};
-
-void SGMap::print(raw_ostream &os) const {
- if (isAssigned()) {
- os << "wi_layout: ";
- wiLayout.print(os);
- os << ", wi_data: ";
- wiData.print(os);
- } else
- os << "Not assigned.";
-}
-
-SGMap SGMap::meet(const SGMap &lhs, const SGMap &rhs) {
- if (!lhs.isAssigned())
- return rhs;
- return lhs;
-}
-
-/// Since this is a backward analysis, join method is not used.
-SGMap SGMap::join(const SGMap &lhs, const SGMap &rhs) {
- llvm_unreachable("Join should not be triggered by SGMapPropagation.");
-}
-
-/// Get the transposed layout according to the given permutation.
-SGMap SGMap::getTransposedLayout(ArrayRef<int64_t> permutation) const {
- if (!isAssigned())
- return {};
- WiLayout newLayout;
- WiData newData;
- for (auto idx : permutation) {
- newLayout.layout.push_back(wiLayout.layout[idx]);
- newData.layout.push_back(wiData.layout[idx]);
- }
- return SGMap(newLayout, newData);
-}
-
-///===----------------------------------------------------------------------===///
-/// SGMapLattice
-///===----------------------------------------------------------------------===///
-
-/// Lattice holding the SGMap for each value.
-struct SGMapLattice : public Lattice<SGMap> {
- MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SGMapLattice)
- using Lattice::Lattice;
-};
-
-/// Helper Functions to get default layouts. A `default layout` is a layout that
-/// is assigned to a value when the layout is not fixed by some anchor operation
-/// (like DPAS). This is the natural layout work items are arranged in a
-/// subgroup.
-
-/// Helper Function to get the default layout for uniform values like constants.
-/// For 1D vector, wi_layout is [subgroupSize] and wi_data is [1].
-/// For 2D vector, wi_layout is [1, subgroupSize] and wi_data is [1, 1].
-static SGMap getDefaultSgMap(unsigned rank) {
- assert((rank == 1 || rank == 2) && "Expected 1D or 2D vector.");
- if (rank == 1)
- return SGMap(WiLayout({subgroupSize}), WiData({1}));
- return SGMap(WiLayout({1, subgroupSize}), WiData({1, 1}));
-}
-
-/// Helper to get the default layout for a vector type.
-static SGMap getDefaultSgMap(VectorType vectorTy) {
- /// Expecting a 1D or 2D vector.
- assert((vectorTy.getRank() == 1 || vectorTy.getRank() == 2) &&
- "Expected 1D or 2D vector.");
- /// Expecting int or float element type.
- assert(vectorTy.getElementType().isIntOrFloat() &&
- "Expected int or float element type.");
- /// If the rank is 1, then return default layout for 1D vector.
- if (vectorTy.getRank() == 1)
- return getDefaultSgMap(1);
- /// Packing factor is determined by the element type bitwidth.
- int packingFactor = 1;
- auto bitwidth = vectorTy.getElementType().getIntOrFloatBitWidth();
- if (bitwidth < packedSizeInBitsForDefault)
- packingFactor = packedSizeInBitsForDefault / bitwidth;
- return SGMap(WiLayout({1, subgroupSize}), WiData({1, packingFactor}));
-}
-
-/// Helper Function to get the expected layouts for DPAS operands. `wi_data` is
-/// set according to the following criteria:
-/// * For A operand, the data must be packed in minimum
-/// `packedSizeInBitsForDefault`
-/// * For B operand, the data must be packed in minimum
-/// `packedSizeInBitsForDpasB`
-static SGMap getSGMapForDPASOperand(VectorType vectorTy, unsigned operandNum) {
- auto elementTy = vectorTy.getElementType();
- assert(elementTy.isIntOrFloat() &&
- "Expected int or float type in DPAS operands");
- WiLayout layout({1, subgroupSize});
- /// For B operand, data must be packed in minimum `packedDpasBSizeInBits` and
- /// must have the VNNI format.
- if (operandNum == 1 &&
- elementTy.getIntOrFloatBitWidth() < packedSizeInBitsForDpasB) {
- WiData data(
- {packedSizeInBitsForDpasB / elementTy.getIntOrFloatBitWidth(), 1});
- return SGMap(layout, data);
- }
- /// Otherwise, return the default layout for the vector type.
- return getDefaultSgMap(vectorTy);
-}
-
-///===----------------------------------------------------------------------===///
-/// SGMapPropagation
-///===----------------------------------------------------------------------===///
-
-/// Backward data flow analysis to propagate the wi_layout and wi_data of each
-/// value in the program. Currently, the layouts for operands DPAS, StoreNd, and
-/// StoreScatter are fixed (known before propagation). Purpose of this analysis
-/// is to propagate those known layouts to all their producers and (other)
-/// consumers.
-class SGMapPropagation : public SparseBackwardDataFlowAnalysis<SGMapLattice> {
-private:
- void visitDpasOp(xegpu::DpasOp dpas, ArrayRef<SGMapLattice *> operands,
- ArrayRef<const SGMapLattice *> results);
-
- void visitStoreNdOp(xegpu::StoreNdOp store, ArrayRef<SGMapLattice *> operands,
- ArrayRef<const SGMapLattice *> results);
-
- void visitStoreScatterOp(xegpu::StoreScatterOp storeScatter,
- ArrayRef<SGMapLattice *> operands,
- ArrayRef<const SGMapLattice *> results);
-
- void visitLoadNdOp(xegpu::LoadNdOp load, ArrayRef<SGMapLattice *> operands,
- ArrayRef<const SGMapLattice *> results);
-
- void visitLoadGatherOp(xegpu::LoadGatherOp load,
- ArrayRef<SGMapLattice *> operands,
- ArrayRef<const SGMapLattice *> results);
-
- void visitTransposeOp(vector::TransposeOp transpose,
- ArrayRef<SGMapLattice *> operands,
- ArrayRef<const SGMapLattice *> results);
-
- void visitVectorBitcastOp(vector::BitCastOp bitcast,
- ArrayRef<SGMapLattice *> operands,
- ArrayRef<const SGMapLattice *> results);
-
- void visitCreateDescOp(xegpu::CreateDescOp createDesc,
- ArrayRef<SGMapLattice *> operands,
- ArrayRef<const SGMapLattice *> results);
-
- void visitUpdateNdOffsetOp(xegpu::UpdateNdOffsetOp updateNdOffset,
- ArrayRef<SGMapLattice *> operands,
- ArrayRef<const SGMapLattice *> results);
-
- void visitVectorMultiReductionOp(vector::MultiDimReductionOp reduction,
- ArrayRef<SGMapLattice *> operands,
- ArrayRef<const SGMapLattice *> results);
-
-public:
- SGMapPropagation(DataFlowSolver &solver, SymbolTableCollection &symbolTable)
- : SparseBackwardDataFlowAnalysis(solver, symbolTable) {}
- using SparseBackwardDataFlowAnalysis::SparseBackwardDataFlowAnalysis;
-
- LogicalResult visitOperation(Operation *op, ArrayRef<SGMapLattice *> operands,
- ArrayRef<const SGMapLattice *> results) override;
-
- void visitBranchOperand(OpOperand &operand) override {};
-
- void visitCallOperand(OpOperand &operand) override {};
-
- void visitExternalCall(CallOpInterface call,
- ArrayRef<SGMapLattice *> operands,
- ArrayRef<const SGMapLattice *> results) override {};
-
- void setToExitState(SGMapLattice *lattice) override {
- (void)lattice->meet(SGMap());
- }
-};
-} // namespace
-
-LogicalResult
-SGMapPropagation::visitOperation(Operation *op,
- ArrayRef<SGMapLattice *> operands,
- ArrayRef<const SGMapLattice *> results) {
- TypeSwitch<Operation *>(op)
- .Case<xegpu::DpasOp>(
- [&](auto dpasOp) { visitDpasOp(dpasOp, operands, results); })
- .Case<xegpu::StoreNdOp>(
- [&](auto storeNdOp) { visitStoreNdOp(storeNdOp, operands, results); })
- .Case<xegpu::StoreScatterOp>([&](auto storeScatterOp) {
- visitStoreScatterOp(storeScatterOp, operands, results);
- })
- .Case<xegpu::LoadNdOp>(
- [&](auto loadNdOp) { visitLoadNdOp(loadNdOp, operands, results); })
- .Case<xegpu::LoadGatherOp>([&](auto loadGatherOp) {
- visitLoadGatherOp(loadGatherOp, operands, results);
- })
- .Case<xegpu::CreateDescOp>([&](auto createDescOp) {
- visitCreateDescOp(createDescOp, operands, results);
- })
- .Case<xegpu::UpdateNdOffsetOp>([&](auto updateNdOffsetOp) {
- visitUpdateNdOffsetOp(updateNdOffsetOp, operands, results);
- })
- /// No need to propagate the layout to operands in CreateNdDescOp because
- /// they are scalars (offsets, sizes, etc.).
- .Case<xegpu::CreateNdDescOp>([&](auto createNdDescOp) {})
- .Case<vector::TransposeOp>([&](auto transposeOp) {
- visitTransposeOp(transposeOp, operands, results);
- })
- .Case<vector::BitCastOp>([&](auto bitcastOp) {
- visitVectorBitcastOp(bitcastOp, operands, results);
- })
- .Case<vector::MultiDimReductionOp>([&](auto reductionOp) {
- visitVectorMultiReductionOp(reductionOp, operands, results);
- })
- /// All other ops.
- .Default([&](Operation *op) {
- for (const SGMapLattice *r : results) {
- for (SGMapLattice *operand : operands) {
- /// Propagate the layout of the result to the operand.
- if (r->getValue().isAssigned())
- meet(operand, *r);
- }
- }
- });
- /// Add a dependency from each result to program point after the operation.
- for (const SGMapLattice *r : results) {
- addDependency(const_cast<SGMapLattice *>(r), getProgramPointAfter(op));
- }
- return success();
-}
-
-void SGMapPropagation::visitVectorMultiReductionOp(
- vector::MultiDimReductionOp reduction, ArrayRef<SGMapLattice *> operands,
- ArrayRef<const SGMapLattice *> results) {
- /// The layout of the result must be present.
- auto resultLayout = results[0]->getValue();
- if (!resultLayout.isAssigned())
- return;
- /// We only consider 2D -> 1D reductions at this point.
- assert(resultLayout.getLayout().size() == 1 &&
- "Expected 1D layout for reduction result.");
- /// Given that the result is 1D, the layout of the operand should be 2D with
- /// default layout.
- auto operandLayout = getDefaultSgMap(2);
- propagateIfChanged(operands[0], operands[0]->meet(operandLayout));
- /// Accumulator should have the same layout as the result.
- propagateIfChanged(operands[1], operands[1]->meet(resultLayout));
-}
-
-/// Propagate the layout of the result tensor to the source tensor descriptor in
-/// UpdateNdOffsetOp.
-void SGMapPropagation::visitUpdateNdOffsetOp(
- xegpu::UpdateNdOffsetOp updateNdOffset, ArrayRef<SGMapLattice *> operands,
- ArrayRef<const SGMapLattice *> results) {
- /// The layout of the result must be present.
- auto resultLayout = results[0]->getValue();
- if (!resultLayout.isAssigned())
- return;
- /// Propagate the layout to the source operand.
- propagateIfChanged(operands[0], operands[0]->meet(resultLayout));
-}
-
-/// Set the layouts for DPAS A, B, and C operands.
-void SGMapPropagation::visitDpasOp(xegpu::DpasOp dpas,
- ArrayRef<SGMapLattice *> operands,
- ArrayRef<const SGMapLattice *> results) {
- auto aTy = dpas.getLhsType();
- auto bTy = dpas.getRhsType();
- propagateIfChanged(operands[0],
- operands[0]->meet(getSGMapForDPASOperand(aTy, 0)));
- propagateIfChanged(operands[1],
- operands[1]->meet(getSGMapForDPASOperand(bTy, 1)));
- if (operands.size() > 2) {
- auto cTy = dpas.getAccType();
- propagateIfChanged(operands[2],
- operands[2]->meet(getSGMapForDPASOperand(cTy, 2)));
- }
-};
-
-/// Set the layout for the value and tensor descriptor operands in StoreNdOp.
-void SGMapPropagation::visitStoreNdOp(xegpu::StoreNdOp store,
- ArrayRef<SGMapLattice *> operands,
- ArrayRef<const SGMapLattice *> results) {
- auto storeLayout = getDefaultSgMap(store.getValueType());
- /// Both operands should have the same layout
- for (SGMapLattice *operand : operands) {
- propagateIfChanged(operand, operand->meet(storeLayout));
- }
-}
-
-/// Propagate the layout of the value to the tensor descriptor operand in
-/// LoadNdOp.
-void SGMapPropagation::visitLoadNdOp(xegpu::LoadNdOp load,
- ArrayRef<SGMapLattice *> operands,
- ArrayRef<const SGMapLattice *> results) {
- auto valueLayout = results[0]->getValue();
- /// Need the layout of the value to propagate to the tensor descriptor.
- if (!valueLayout.isAssigned())
- return;
- SGMap tensorDescLayout = valueLayout;
- /// LoadNdOp has the transpose effect. However, at the stage of this analysis
- /// this effect is not expected and should be abstracted away. Emit a warning.
- if (auto transpose = load.getTranspose()) {
- load.emitWarning("Transpose effect is not expected for LoadNdOp at "
- "SGMapPropagation stage.");
- tensorDescLayout = valueLayout.getTransposedLayout(transpose.value());
- }
- /// Propagate the new layout to the tensor descriptor operand.
- propagateIfChanged(operands[0], operands[0]->meet(tensorDescLayout));
-}
-
-/// For vector::TransposeOp, the layout of the result is transposed and
-/// propagated to the operand.
-void SGMapPropagation::visitTransposeOp(
- vector::TransposeOp transpose, ArrayRef<SGMapLattice *> operands,
- ArrayRef<const SGMapLattice *> results) {
- /// Need the layout of transpose result to propagate to the operands.
- auto resultLayout = results[0]->getValue();
- if (!resultLayout.isAssigned())
- return;
- auto newLayout = resultLayout.getTransposedLayout(transpose.getPermutation());
- /// Propagate the new layout to the vector operand.
- propagateIfChanged(operands[0], operands[0]->meet(newLayout));
-}
-
-/// For vector::BitCastOp, the wi_data of the source layout is changed based on
-/// the bit width of the source and result types.
-void SGMap...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/131364
More information about the Mlir-commits
mailing list