[Mlir-commits] [mlir] [MLIR][AMDGPU] Add a wrapper for global LDS load intrinsics in AMDGPU (PR #133498)
Alan Li
llvmlistbot at llvm.org
Wed Apr 2 17:53:06 PDT 2025
================
@@ -903,6 +903,66 @@ struct WMMAOpLowering : public ConvertOpToLLVMPattern<WMMAOp> {
}
};
+struct GatherToLDSOpLowering : public ConvertOpToLLVMPattern<GatherToLDSOp> {
+ GatherToLDSOpLowering(const LLVMTypeConverter &converter, Chipset chipset)
+ : ConvertOpToLLVMPattern<GatherToLDSOp>(converter), chipset(chipset) {}
+
+ Chipset chipset;
+
+ LogicalResult
+ matchAndRewrite(GatherToLDSOp op, GatherToLDSOpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ if (chipset < kGfx942)
+ return op.emitOpError("chipset not supported");
+
+ Location loc = op.getLoc();
+
+ auto srcMemRefType = cast<MemRefType>(op.getSrc().getType());
+ auto dstMemRefType = cast<MemRefType>(op.getSrc().getType());
+
+ // TODO: instead of only transfering one element per thread, we could
+ // augment it to transfer multiple elements per thread by issuing multiple
+ // `global_load_lds` instructions.
+ size_t loadWidth;
+ Type transferType = op.getTransferType();
+ if (auto transferVectorType = dyn_cast<VectorType>(transferType))
+ loadWidth = transferVectorType.getNumElements() *
+ transferVectorType.getElementTypeBitWidth() / 8;
+ else
+ loadWidth = transferType.getIntOrFloatBitWidth() / 8;
+
+ // Currently only 1, 2, and 4 byte loads are supported.
+ if (loadWidth != 1 && loadWidth != 2 && loadWidth != 4)
+ return op.emitOpError("chipset unsupported element size");
+
+ auto convertIndices = [&](ValueRange indices) -> SmallVector<Value, 4> {
+ SmallVector<Value, 4> convertedIndices;
+
+ for (Value index : indices) {
+ Type convertedType = getTypeConverter()->convertType(index.getType());
+ auto convertedIndex = rewriter.create<LLVM::ConstantOp>(
----------------
lialan wrote:
Actually I was wrong! I have fixed this.
https://github.com/llvm/llvm-project/pull/133498
More information about the Mlir-commits
mailing list