[Mlir-commits] [mlir] [MLIR] [XeGPU] Add distribution patterns for vector transpose, bitcast & mask ops in sg to wi pass (PR #187392)
Jianhui Li
llvmlistbot at llvm.org
Wed Mar 25 15:57:38 PDT 2026
================
@@ -687,6 +688,148 @@ struct SgToWiLoadMatrix : public OpConversionPattern<xegpu::LoadMatrixOp> {
}
};
+/// Distributes a subgroup-level vector.transpose op to workitem-level.
+struct SgToWiVectorTranspose : public OpConversionPattern<vector::TransposeOp> {
+ using OpConversionPattern<vector::TransposeOp>::OpConversionPattern;
+
+ LogicalResult
+ matchAndRewrite(vector::TransposeOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ xegpu::DistributeLayoutAttr sourceLayout =
+ xegpu::getTemporaryLayout(op->getOpOperand(0));
+ xegpu::DistributeLayoutAttr resultLayout =
+ xegpu::getTemporaryLayout(op->getOpResult(0));
+ if (!sourceLayout || !resultLayout)
+ return rewriter.notifyMatchFailure(
+ op, "the source or result vector of the transpose op lacks layout "
+ "attribute");
+ ArrayRef<int64_t> perm = op.getPermutation();
+ // Result layout must be a transpose of source layout.
+ if (!resultLayout.isTransposeOf(sourceLayout, perm,
+ xegpu::LayoutKind::Lane))
+ return rewriter.notifyMatchFailure(
+ op, "the source or result vector layouts must be transposes of "
+ "each other");
+ FailureOr<VectorType> distributedResultTypeOrFailure =
+ getDistVecTypeBasedOnLaneLayout(resultLayout, op.getResultVectorType());
+ if (failed(distributedResultTypeOrFailure))
+ return rewriter.notifyMatchFailure(
+ op, "Failed to distribute the result vector type in "
+ "vector::Transpose op");
+ auto newOp = vector::TransposeOp::create(rewriter, op.getLoc(),
+ adaptor.getVector(), perm);
+ rewriter.replaceOp(op, castValueTo(rewriter, newOp.getResult(),
+ distributedResultTypeOrFailure.value()));
+ return success();
+ }
+};
+
+/// Distributes a subgroup-level vector.bitcast op to workitem-level.
+/// Bitcast only impacts the innermost dimension of the source/result vectors.
+struct SgToWiVectorBitcast : public OpConversionPattern<vector::BitCastOp> {
+ using OpConversionPattern<vector::BitCastOp>::OpConversionPattern;
+
+ LogicalResult
+ matchAndRewrite(vector::BitCastOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ xegpu::DistributeLayoutAttr resultLayout =
+ xegpu::getTemporaryLayout(op->getOpResult(0));
+ if (!resultLayout)
+ return rewriter.notifyMatchFailure(
+ op, "result vector of the bitcast op lacks layout attribute");
+ FailureOr<VectorType> distributedResultTypeOrFailure =
+ getDistVecTypeBasedOnLaneLayout(resultLayout, op.getResultVectorType());
+ if (failed(distributedResultTypeOrFailure))
+ return rewriter.notifyMatchFailure(
+ op, "Failed to distribute the result vector type in "
+ "vector::BitCast op");
+ auto newOp = vector::BitCastOp::create(
+ rewriter, op.getLoc(), distributedResultTypeOrFailure.value(),
+ adaptor.getSource());
+ rewriter.replaceOp(op, newOp.getResult());
+ return success();
+ }
+};
+
+/// Distributes a subgroup-level vector.create_mask or vector.constant_mask op
+/// to workitem-level. Each lane computes its own mask bounds based on its
+/// lane coordinates. For each dimension i, the new mask bound is:
+/// new_bound[i] = original_bound[i] - lane_coord[i] * dist_shape[i]
----------------
Jianhui-Li wrote:
>> new_bound[i] = original_bound[i] - lane_coord[i] * dist_shape[i]
dist_shape is lane_data here? if so, sugges to replace it.
>> vector.create_mask implicitly clamps to [0, vector_size].
what is vector_size here?
https://github.com/llvm/llvm-project/pull/187392
More information about the Mlir-commits
mailing list