[Mlir-commits] [mlir] 58ac25e - [AMDGPU] Add GatherToLDS async flag back in FoldMemRefOpsIntoGatherToLDSOp (#182364)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Fri Feb 20 05:26:13 PST 2026


Author: Zhuoran Yin
Date: 2026-02-20T08:26:08-05:00
New Revision: 58ac25e3f0dc772689dd35ece24c1e8159be523b

URL: https://github.com/llvm/llvm-project/commit/58ac25e3f0dc772689dd35ece24c1e8159be523b
DIFF: https://github.com/llvm/llvm-project/commit/58ac25e3f0dc772689dd35ece24c1e8159be523b.diff

LOG: [AMDGPU] Add GatherToLDS async flag back in FoldMemRefOpsIntoGatherToLDSOp (#182364)

I discovered that async flag on GatherToLDS op got dropped going through
the lowering pipeline so adding it back as it should.

Added: 
    

Modified: 
    mlir/lib/Dialect/AMDGPU/Transforms/FoldMemRefsOps.cpp
    mlir/test/Dialect/AMDGPU/amdgpu-fold-memrefs.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/AMDGPU/Transforms/FoldMemRefsOps.cpp b/mlir/lib/Dialect/AMDGPU/Transforms/FoldMemRefsOps.cpp
index 865182647d127..9a6915b003df4 100644
--- a/mlir/lib/Dialect/AMDGPU/Transforms/FoldMemRefsOps.cpp
+++ b/mlir/lib/Dialect/AMDGPU/Transforms/FoldMemRefsOps.cpp
@@ -92,9 +92,9 @@ struct FoldMemRefOpsIntoGatherToLDSOp final : OpRewritePattern<GatherToLDSOp> {
       destIndices = op.getDstIndices();
     }
 
-    rewriter.replaceOpWithNewOp<GatherToLDSOp>(op, memrefSource, sourceIndices,
-                                               memrefDest, destIndices,
-                                               op.getTransferType());
+    rewriter.replaceOpWithNewOp<GatherToLDSOp>(
+        op, memrefSource, sourceIndices, memrefDest, destIndices,
+        op.getTransferType(), op.getAsync());
 
     return success();
   }

diff  --git a/mlir/test/Dialect/AMDGPU/amdgpu-fold-memrefs.mlir b/mlir/test/Dialect/AMDGPU/amdgpu-fold-memrefs.mlir
index 8ca3dd60414df..77a5e4af4c1d1 100644
--- a/mlir/test/Dialect/AMDGPU/amdgpu-fold-memrefs.mlir
+++ b/mlir/test/Dialect/AMDGPU/amdgpu-fold-memrefs.mlir
@@ -161,3 +161,25 @@ func.func @test_nop(%mem : memref<8192xf16, #amdgpu.address_space<fat_raw_buffer
     : vector<8xf16>, memref<8192xf16, #amdgpu.address_space<fat_raw_buffer>>, memref<4096xf16, #gpu_lds_addrspace>
   func.return
 }
+
+// -----
+
+#gpu_lds_addrspace = 3
+
+// CHECK: func @test_async_flag_preserved
+// CHECK-SAME: %[[ARG0:.*]]: index, %[[ARG1:.*]]: index
+func.func @test_async_flag_preserved(%offset_i: index, %offset_j: index) {
+  // CHECK: %[[LOCAL:.*]] = memref.alloc() : memref<64x64xf16, 3>
+  // CHECK: %[[MEM:.*]] = memref.alloc() : memref<64x128xf16>
+  // CHECK: %[[C0:.*]] = arith.constant 0 : index
+  // CHECK: amdgpu.gather_to_lds async %[[MEM]][%[[ARG0]], %[[ARG1]]], %[[LOCAL]][%[[C0]], %[[C0]]]
+  // CHECK-SAME: vector<8xf16>, memref<64x128xf16>, memref<64x64xf16, 3>
+
+  %alloc = memref.alloc() : memref<64x64xf16, #gpu_lds_addrspace>
+  %mem = memref.alloc() : memref<64x128xf16>
+  %subview = memref.subview %mem[0, 0][32, 64][1, 1] : memref<64x128xf16> to memref<32x64xf16, strided<[128, 1]>>
+  %c0 = arith.constant 0 : index
+  amdgpu.gather_to_lds async %subview[%offset_i, %offset_j], %alloc[%c0, %c0]
+    : vector<8xf16>, memref<32x64xf16, strided<[128, 1]>>, memref<64x64xf16, #gpu_lds_addrspace>
+  func.return
+}


        


More information about the Mlir-commits mailing list