[Mlir-commits] [mlir] [mlir][xegpu] Add SIMT distribution support for GEMM transpose B case. (PR #155517)
Jianhui Li
llvmlistbot at llvm.org
Wed Sep 3 15:53:26 PDT 2025
================
@@ -807,6 +822,138 @@ struct GpuBarrierDistribution final : public gpu::WarpDistributionPattern {
}
};
+/// Sink a memref::ExtractAlignedPointerAsIndex op feeding into yield op of an
+/// enclosing `gpu.warp_execute_on_lane_0` region. This will simply move the op
+/// outside of the warp op.
+struct MemrefExtractAlignedPointerAsIndexDistribution final
+ : public gpu::WarpDistributionPattern {
+ using gpu::WarpDistributionPattern::WarpDistributionPattern;
+ LogicalResult matchAndRewrite(gpu::WarpExecuteOnLane0Op warpOp,
+ PatternRewriter &rewriter) const override {
+ OpOperand *operand = getWarpResult(
+ warpOp, llvm::IsaPred<memref::ExtractAlignedPointerAsIndexOp>);
+ if (!operand)
+ return rewriter.notifyMatchFailure(
+ warpOp,
+ "warp result is not a xegpu::MemrefExtractAlignedPointerAsIndex op");
+ auto extractOp =
+ operand->get().getDefiningOp<memref::ExtractAlignedPointerAsIndexOp>();
+ unsigned operandIdx = operand->getOperandNumber();
+ SmallVector<size_t> newRetIndices;
+ gpu::WarpExecuteOnLane0Op newWarpOp = moveRegionToNewWarpOpAndAppendReturns(
+ rewriter, warpOp, extractOp.getSource(),
+ TypeRange{extractOp.getSource().getType()}, newRetIndices);
+ rewriter.setInsertionPointAfter(newWarpOp);
+ auto newExtractOp = memref::ExtractAlignedPointerAsIndexOp::create(
+ rewriter, newWarpOp.getLoc(), extractOp.getType(),
+ newWarpOp.getResult(newRetIndices[0]));
+ Value distributedVal = newWarpOp.getResult(operandIdx);
+ rewriter.replaceAllUsesWith(distributedVal, newExtractOp.getResult());
+ return success();
+ }
+};
+
+struct VectorBitcastDistribution final : public gpu::WarpDistributionPattern {
+ using gpu::WarpDistributionPattern::WarpDistributionPattern;
+ LogicalResult matchAndRewrite(gpu::WarpExecuteOnLane0Op warpOp,
+ PatternRewriter &rewriter) const override {
+ OpOperand *operand =
+ getWarpResult(warpOp, llvm::IsaPred<vector::BitCastOp>);
+ if (!operand)
+ return rewriter.notifyMatchFailure(
+ warpOp, "warp result is not a vector::BitCast op");
+ auto bitcastOp = operand->get().getDefiningOp<vector::BitCastOp>();
+ unsigned operandIdx = operand->getOperandNumber();
+ VectorType distributedSourceType =
+ getDistVecTypeBasedOnLaneLayout(
+ xegpu::getDistributeLayoutAttr(bitcastOp.getSource()),
+ bitcastOp.getSourceVectorType())
+ .value_or(VectorType());
+ if (!distributedSourceType)
+ return rewriter.notifyMatchFailure(
+ bitcastOp, "Failed to distribute the source vector type in "
+ "vector::BitCast op");
+ VectorType distributedResultType =
+ cast<VectorType>(warpOp.getResult(operandIdx).getType());
+ if (distributedSourceType.getRank() != 2 ||
----------------
Jianhui-Li wrote:
bitcase may be used in non-gemm case where it may have nd instead of 2d.
https://github.com/llvm/llvm-project/pull/155517
More information about the Mlir-commits
mailing list