[Mlir-commits] [mlir] [MLIR][AMDGPU] Add a wrapper for global LDS load intrinsics in AMDGPU (PR #133498)
Krzysztof Drewniak
llvmlistbot at llvm.org
Tue Apr 1 16:42:30 PDT 2025
================
@@ -903,6 +903,81 @@ struct WMMAOpLowering : public ConvertOpToLLVMPattern<WMMAOp> {
}
};
+struct GlobalLoadLDSOpLowering
+ : public ConvertOpToLLVMPattern<GlobalLoadLDSOp> {
+ GlobalLoadLDSOpLowering(const LLVMTypeConverter &converter, Chipset chipset)
+ : ConvertOpToLLVMPattern<GlobalLoadLDSOp>(converter), chipset(chipset) {}
+
+ Chipset chipset;
+
+ LogicalResult
+ matchAndRewrite(GlobalLoadLDSOp op, GlobalLoadLDSOpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ Location loc = op.getLoc();
+
+ auto elemType = cast<MemRefType>(op.getDst().getType()).getElementType();
+ size_t elemSizeInBits = elemType.getIntOrFloatBitWidth();
+ if (elemSizeInBits % 8 != 0)
+ return op.emitOpError("element size must be a multiple of 8");
+
+ // TODO: instead of only transfering one element per thread, we could
+ // augment it to transfer multiple elements per thread by issuing multiple
+ // `global_load_lds` instructions.
+ auto loadWidth = elemSizeInBits / 8;
+
+ const Chipset GlobalLoadEnabled{9, 0x4, 0x0};
----------------
krzysz00 wrote:
`kGfx942` above. Especially since gfx940 doesn't exist.
https://github.com/llvm/llvm-project/pull/133498
More information about the Mlir-commits
mailing list