[Mlir-commits] [mlir] [MLIR][AMDGPU] Add a wrapper for global LDS load intrinsics in AMDGPU (PR #133498)

Wed Apr 2 17:53:06 PDT 2025

================
@@ -903,6 +903,66 @@ struct WMMAOpLowering : public ConvertOpToLLVMPattern<WMMAOp> {
   }
 };
 
+struct GatherToLDSOpLowering : public ConvertOpToLLVMPattern<GatherToLDSOp> {
+  GatherToLDSOpLowering(const LLVMTypeConverter &converter, Chipset chipset)
+      : ConvertOpToLLVMPattern<GatherToLDSOp>(converter), chipset(chipset) {}
+
+  Chipset chipset;
+
+  LogicalResult
+  matchAndRewrite(GatherToLDSOp op, GatherToLDSOpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    if (chipset < kGfx942)
+      return op.emitOpError("chipset not supported");
+
+    Location loc = op.getLoc();
+
+    auto srcMemRefType = cast<MemRefType>(op.getSrc().getType());
+    auto dstMemRefType = cast<MemRefType>(op.getSrc().getType());
+
+    // TODO: instead of only transfering one element per thread, we could
+    // augment it to transfer multiple elements per thread by issuing multiple
+    // `global_load_lds` instructions.
+    size_t loadWidth;
+    Type transferType = op.getTransferType();
+    if (auto transferVectorType = dyn_cast<VectorType>(transferType))
+      loadWidth = transferVectorType.getNumElements() *
+                  transferVectorType.getElementTypeBitWidth() / 8;
+    else
+      loadWidth = transferType.getIntOrFloatBitWidth() / 8;
+
+    // Currently only 1, 2, and 4 byte loads are supported.
+    if (loadWidth != 1 && loadWidth != 2 && loadWidth != 4)
+      return op.emitOpError("chipset unsupported element size");
+
+    auto convertIndices = [&](ValueRange indices) -> SmallVector<Value, 4> {
+      SmallVector<Value, 4> convertedIndices;
+
+      for (Value index : indices) {
+        Type convertedType = getTypeConverter()->convertType(index.getType());
+        auto convertedIndex = rewriter.create<LLVM::ConstantOp>(
----------------
lialan wrote:

Actually I was wrong! I have fixed this.

https://github.com/llvm/llvm-project/pull/133498