[Mlir-commits] [mlir] [MLIR][AMDGPU] Add a wrapper for global LDS load intrinsics in AMDGPU (PR #133498)
Alan Li
llvmlistbot at llvm.org
Mon Apr 7 13:48:35 PDT 2025
================
@@ -1010,6 +1010,53 @@ struct WMMAOpLowering : public ConvertOpToLLVMPattern<WMMAOp> {
}
};
+struct GatherToLDSOpLowering : public ConvertOpToLLVMPattern<GatherToLDSOp> {
+ GatherToLDSOpLowering(const LLVMTypeConverter &converter, Chipset chipset)
+ : ConvertOpToLLVMPattern<GatherToLDSOp>(converter), chipset(chipset) {}
+
+ Chipset chipset;
+
+ LogicalResult
+ matchAndRewrite(GatherToLDSOp op, GatherToLDSOpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ if (chipset < kGfx942)
+ return op.emitOpError("chipset not supported");
+
+ Location loc = op.getLoc();
+
+ auto srcMemRefType = cast<MemRefType>(op.getSrc().getType());
+ auto dstMemRefType = cast<MemRefType>(op.getSrc().getType());
+
+ // TODO: instead of only transfering one element per thread, we could
+ // augment it to transfer multiple elements per thread by issuing multiple
+ // `global_load_lds` instructions.
+ size_t loadWidth;
----------------
lialan wrote:
good suggestion. updated.
https://github.com/llvm/llvm-project/pull/133498
More information about the Mlir-commits
mailing list