[Mlir-commits] [mlir] [MLIR][AMDGPU] Add amdgpu.global_transpose_load op for RDNA4 global memory transpose loads (PR #195287)
Krzysztof Drewniak
llvmlistbot at llvm.org
Fri May 1 09:51:02 PDT 2026
================
@@ -2226,6 +2226,66 @@ struct TransposeLoadOpLowering
}
};
+struct GlobalTransposeLoadOpLowering
+ : public ConvertOpToLLVMPattern<GlobalTransposeLoadOp> {
+ GlobalTransposeLoadOpLowering(const LLVMTypeConverter &converter,
+ Chipset chipset)
+ : ConvertOpToLLVMPattern<GlobalTransposeLoadOp>(converter),
+ chipset(chipset) {}
+
+ Chipset chipset;
+
+ LogicalResult
+ matchAndRewrite(GlobalTransposeLoadOp op,
+ GlobalTransposeLoadOpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ if (chipset < kGfx1250)
+ return op.emitOpError(
+ "global_transpose_load is only supported on gfx1250+");
+
+ Location loc = op.getLoc();
+ auto srcMemRefType = cast<MemRefType>(op.getSrc().getType());
+ auto resultType = cast<VectorType>(op.getResult().getType());
+
+ Value srcPtr =
+ getStridedElementPtr(rewriter, loc, srcMemRefType, adaptor.getSrc(),
+ adaptor.getSrcIndices());
----------------
krzysz00 wrote:
Add the noise for this is inbounds so we get `gep nuw inbounds`
https://github.com/llvm/llvm-project/pull/195287
More information about the Mlir-commits
mailing list