[Mlir-commits] [mlir] [mlir][AMDGPU] Make AMDGPUToROCDL conversion tests use real address spaces (PR #180661)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Mon Feb 9 17:53:38 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
@llvm/pr-subscribers-mlir-gpu
Author: Krzysztof Drewniak (krzysz00)
<details>
<summary>Changes</summary>
Since #<!-- -->171876, -amdgpu-to-rocdl (the pass) is now set up to handle address spaces like `#gpu.address_space<global>`. Update the tests accordingly.
---
Patch is 29.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/180661.diff
4 Files Affected:
- (modified) mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir (+18-23)
- (modified) mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir (+15-34)
- (modified) mlir/test/Conversion/AMDGPUToROCDL/load_lds-gfx950.mlir (+8-16)
- (modified) mlir/test/Conversion/AMDGPUToROCDL/load_lds.mlir (+21-25)
``````````diff
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
index e55bca4bad42f..e43ecfd01cb50 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
@@ -6,16 +6,11 @@
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1201 | FileCheck %s --check-prefixes=CHECK,GFX12,RDNA
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1250 | FileCheck %s --check-prefixes=CHECK,GFX1250
-// Note: #gpu.address_space<global> is hardcoded to `1` here because the
-// test pass doesn't set up the GPU address space conversions.
-
// CHECK: #[[$MMRA_TAG:.+]] = #llvm.mmra_tag<"amdgpu-synchronize-as":"local">
-#gpu_global_addrspace = 1
-
// CHECK-LABEL: func @fat_raw_buffer_cast
-func.func @fat_raw_buffer_cast(%buf: memref<8xi32, #gpu_global_addrspace>) -> memref<8xi32, #amdgpu.address_space<fat_raw_buffer>> {
- // CHECK: %[[desc:.*]] = builtin.unrealized_conversion_cast %{{.*}} : memref<8xi32, 1> to !llvm.struct<(ptr<1>, ptr<1>, i64, array<1 x i64>, array<1 x i64>)>
+func.func @fat_raw_buffer_cast(%buf: memref<8xi32, #gpu.address_space<global>>) -> memref<8xi32, #amdgpu.address_space<fat_raw_buffer>> {
+ // CHECK: %[[desc:.*]] = builtin.unrealized_conversion_cast %{{.*}} : memref<8xi32, #gpu.address_space<global>> to !llvm.struct<(ptr<1>, ptr<1>, i64, array<1 x i64>, array<1 x i64>)>
// CHECK-DAG: %[[base:.*]] = llvm.extractvalue %[[desc]][1]
// CHECK-DAG: %[[offset:.*]] = llvm.extractvalue %[[desc]][2]
// CHECK-DAG: %[[sizes:.*]] = llvm.extractvalue %[[desc]][3]
@@ -33,13 +28,13 @@ func.func @fat_raw_buffer_cast(%buf: memref<8xi32, #gpu_global_addrspace>) -> me
// CHECK: %[[ret4:.*]] = llvm.insertvalue %[[sizes]], %[[ret3]][3]
// CHECK: %[[ret5:.*]] = llvm.insertvalue %[[strides]], %[[ret4]][4]
// CHECK: builtin.unrealized_conversion_cast %[[ret5]]
- %ret = amdgpu.fat_raw_buffer_cast %buf : memref<8xi32, #gpu_global_addrspace> to memref<8xi32, #amdgpu.address_space<fat_raw_buffer>>
+ %ret = amdgpu.fat_raw_buffer_cast %buf : memref<8xi32, #gpu.address_space<global>> to memref<8xi32, #amdgpu.address_space<fat_raw_buffer>>
return %ret : memref<8xi32, #amdgpu.address_space<fat_raw_buffer>>
}
// CHECK-LABEL: func @fat_raw_buffer_cast_0d
-func.func @fat_raw_buffer_cast_0d(%buf: memref<i32, #gpu_global_addrspace>) -> memref<i32, #amdgpu.address_space<fat_raw_buffer>> {
- // CHECK: %[[desc:.*]] = builtin.unrealized_conversion_cast %{{.*}} : memref<i32, 1> to !llvm.struct<(ptr<1>, ptr<1>, i64)>
+func.func @fat_raw_buffer_cast_0d(%buf: memref<i32, #gpu.address_space<global>>) -> memref<i32, #amdgpu.address_space<fat_raw_buffer>> {
+ // CHECK: %[[desc:.*]] = builtin.unrealized_conversion_cast %{{.*}} : memref<i32, #gpu.address_space<global>> to !llvm.struct<(ptr<1>, ptr<1>, i64)>
// CHECK-DAG: %[[base:.*]] = llvm.extractvalue %[[desc]][1]
// CHECK-DAG: %[[offset:.*]] = llvm.extractvalue %[[desc]][2]
// CHECK-DAG: %[[numRecords:.*]] = llvm.mlir.constant(4 : i64) : i64
@@ -53,12 +48,12 @@ func.func @fat_raw_buffer_cast_0d(%buf: memref<i32, #gpu_global_addrspace>) -> m
// CHECK: %[[ret2:.*]] = llvm.insertvalue %[[fatBuf]], %[[ret1]][1]
// CHECK: %[[ret3:.*]] = llvm.insertvalue %[[offset]], %[[ret2]][2]
// CHECK: builtin.unrealized_conversion_cast %[[ret3]]
- %ret = amdgpu.fat_raw_buffer_cast %buf : memref<i32, #gpu_global_addrspace> to memref<i32, #amdgpu.address_space<fat_raw_buffer>>
+ %ret = amdgpu.fat_raw_buffer_cast %buf : memref<i32, #gpu.address_space<global>> to memref<i32, #amdgpu.address_space<fat_raw_buffer>>
return %ret : memref<i32, #amdgpu.address_space<fat_raw_buffer>>
}
// CHECK-LABEL: func @fat_raw_buffer_cast_dyn_size_offset
-func.func @fat_raw_buffer_cast_dyn_size_offset(%buf: memref<?xi32, strided<[1], offset: ?>, #gpu_global_addrspace>) -> memref<?xi32, strided<[1], offset: ?>, #amdgpu.address_space<fat_raw_buffer>> {
+func.func @fat_raw_buffer_cast_dyn_size_offset(%buf: memref<?xi32, strided<[1], offset: ?>, #gpu.address_space<global>>) -> memref<?xi32, strided<[1], offset: ?>, #amdgpu.address_space<fat_raw_buffer>> {
// CHECK: %[[size0:.*]] = llvm.extractvalue %{{.*}}[3, 0]
// CHECK: %[[stride0:.*]] = llvm.extractvalue %{{.*}}[4, 0]
// CHECK: %[[maxVals:.*]] = llvm.mul %[[size0]], %[[stride0]]
@@ -67,13 +62,13 @@ func.func @fat_raw_buffer_cast_dyn_size_offset(%buf: memref<?xi32, strided<[1],
// CHECK: %[[offset:.*]] = llvm.extractvalue %{{.*}}[2]
// CHECK: rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %{{.*}}
// CHECK: llvm.insertvalue %[[offset]], %{{.*}}[2]
- %ret = amdgpu.fat_raw_buffer_cast %buf : memref<?xi32, strided<[1], offset: ?>, #gpu_global_addrspace> to memref<?xi32, strided<[1], offset: ?>, #amdgpu.address_space<fat_raw_buffer>>
+ %ret = amdgpu.fat_raw_buffer_cast %buf : memref<?xi32, strided<[1], offset: ?>, #gpu.address_space<global>> to memref<?xi32, strided<[1], offset: ?>, #amdgpu.address_space<fat_raw_buffer>>
return %ret : memref<?xi32, strided<[1], offset: ?>, #amdgpu.address_space<fat_raw_buffer>>
}
// CHECK-LABEL: func @fat_raw_buffer_cast_reset_offset
-func.func @fat_raw_buffer_cast_reset_offset(%buf: memref<?xi32, strided<[1], offset: ?>, #gpu_global_addrspace>) -> memref<?xi32, #amdgpu.address_space<fat_raw_buffer>> {
- // CHECK: %[[desc:.*]] = builtin.unrealized_conversion_cast %{{.*}} : memref<?xi32, strided<[1], offset: ?>, 1> to !llvm.struct<(ptr<1>, ptr<1>, i64, array<1 x i64>, array<1 x i64>)>
+func.func @fat_raw_buffer_cast_reset_offset(%buf: memref<?xi32, strided<[1], offset: ?>, #gpu.address_space<global>>) -> memref<?xi32, #amdgpu.address_space<fat_raw_buffer>> {
+ // CHECK: %[[desc:.*]] = builtin.unrealized_conversion_cast %{{.*}} : memref<?xi32, strided<[1], offset: ?>, #gpu.address_space<global>> to !llvm.struct<(ptr<1>, ptr<1>, i64, array<1 x i64>, array<1 x i64>)>
// CHECK-DAG: %[[memRefPtr:.*]] = llvm.extractvalue %[[desc]][1]
// CHECK-DAG: %[[memRefOff:.*]] = llvm.extractvalue %[[desc]][2]
// CHECK-DAG: %[[basePtr:.*]] = llvm.getelementptr %[[memRefPtr]][%[[memRefOff]]]
@@ -81,21 +76,21 @@ func.func @fat_raw_buffer_cast_reset_offset(%buf: memref<?xi32, strided<[1], off
// CHECK: %[[fatBuf:.*]] = rocdl.make.buffer.rsrc %[[basePtr]], %{{.*}}, %{{.*}}, %{{.*}}
// CHECK: llvm.insertvalue %[[fatBuf]], %{{.*}}[1]
// CHECK: llvm.insertvalue %[[zeroOff]], %{{.*}}[2]
- %ret = amdgpu.fat_raw_buffer_cast %buf resetOffset : memref<?xi32, strided<[1], offset: ?>, #gpu_global_addrspace> to memref<?xi32, #amdgpu.address_space<fat_raw_buffer>>
+ %ret = amdgpu.fat_raw_buffer_cast %buf resetOffset : memref<?xi32, strided<[1], offset: ?>, #gpu.address_space<global>> to memref<?xi32, #amdgpu.address_space<fat_raw_buffer>>
return %ret : memref<?xi32, #amdgpu.address_space<fat_raw_buffer>>
}
// CHECK-LABEL: func @fat_raw_buffer_cast_valid_bytes
-func.func @fat_raw_buffer_cast_valid_bytes(%buf: memref<8xi32, #gpu_global_addrspace>) -> memref<8xi32, #amdgpu.address_space<fat_raw_buffer>> {
+func.func @fat_raw_buffer_cast_valid_bytes(%buf: memref<8xi32, #gpu.address_space<global>>) -> memref<8xi32, #amdgpu.address_space<fat_raw_buffer>> {
// CHECK: %[[numRecords:.*]] = arith.constant -1 : i64
// CHECK: rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %{{.*}}
%cu64_max = arith.constant -1 : i64
- %ret = amdgpu.fat_raw_buffer_cast %buf validBytes(%cu64_max) : memref<8xi32, #gpu_global_addrspace> to memref<8xi32, #amdgpu.address_space<fat_raw_buffer>>
+ %ret = amdgpu.fat_raw_buffer_cast %buf validBytes(%cu64_max) : memref<8xi32, #gpu.address_space<global>> to memref<8xi32, #amdgpu.address_space<fat_raw_buffer>>
return %ret : memref<8xi32, #amdgpu.address_space<fat_raw_buffer>>
}
// CHECK-LABEL: func @fat_raw_buffer_cast_bounds_check
-func.func @fat_raw_buffer_cast_bounds_check(%buf: memref<8xi32, #gpu_global_addrspace>) -> memref<8xi32, #amdgpu.address_space<fat_raw_buffer>> {
+func.func @fat_raw_buffer_cast_bounds_check(%buf: memref<8xi32, #gpu.address_space<global>>) -> memref<8xi32, #amdgpu.address_space<fat_raw_buffer>> {
// GFX9: %[[numRecords:.*]] = llvm.mlir.constant({{.*}} : i64)
// GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
// GFX1250: %[[numRecords:.*]] = llvm.mlir.constant(35184372088831 : i64)
@@ -103,13 +98,13 @@ func.func @fat_raw_buffer_cast_bounds_check(%buf: memref<8xi32, #gpu_global_addr
// RDNA: %[[numRecords:.*]] = llvm.mlir.constant({{.*}} : i64)
// RDNA: %[[flags:.*]] = llvm.mlir.constant(553807872 : i32)
// CHECK: %[[rsrc:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
- %ret = amdgpu.fat_raw_buffer_cast %buf boundsCheck(false) : memref<8xi32, #gpu_global_addrspace> to memref<8xi32, #amdgpu.address_space<fat_raw_buffer>>
+ %ret = amdgpu.fat_raw_buffer_cast %buf boundsCheck(false) : memref<8xi32, #gpu.address_space<global>> to memref<8xi32, #amdgpu.address_space<fat_raw_buffer>>
return %ret : memref<8xi32, #amdgpu.address_space<fat_raw_buffer>>
}
// CHECK-LABEL: func @fat_raw_buffer_cast_cache_swizzle
-// CHECK-SAME: (%{{.*}}: memref<64x64xi32, 1>, %[[stride:.*]]: i14)
-func.func @fat_raw_buffer_cast_cache_swizzle(%buf: memref<64x64xi32, #gpu_global_addrspace>, %stride: i14) -> memref<64x64xi32, #amdgpu.address_space<fat_raw_buffer>> {
+// CHECK-SAME: (%{{.*}}: memref<64x64xi32, #gpu.address_space<global>>, %[[stride:.*]]: i14)
+func.func @fat_raw_buffer_cast_cache_swizzle(%buf: memref<64x64xi32, #gpu.address_space<global>>, %stride: i14) -> memref<64x64xi32, #amdgpu.address_space<fat_raw_buffer>> {
// GFX908: %[[stride:.*]] = llvm.mlir.constant(0 : i16) : i16
// GFX908: %[[flags:.*]] = llvm.mlir.constant
// GFX90A: %[[stride:.*]] = llvm.mlir.constant(0 : i16) : i16
@@ -123,7 +118,7 @@ func.func @fat_raw_buffer_cast_cache_swizzle(%buf: memref<64x64xi32, #gpu_global
// GFX1250: %[[stride:.*]] = llvm.mlir.constant(0 : i16) : i16
// GFX1250: %[[flags:.*]] = llvm.mlir.constant(0 : i32)
// CHECK: rocdl.make.buffer.rsrc %{{.*}}, %[[stride]], %{{.*}}, %[[flags]]
- %ret = amdgpu.fat_raw_buffer_cast %buf cacheSwizzleStride(%stride) : memref<64x64xi32, #gpu_global_addrspace> to memref<64x64xi32, #amdgpu.address_space<fat_raw_buffer>>
+ %ret = amdgpu.fat_raw_buffer_cast %buf cacheSwizzleStride(%stride) : memref<64x64xi32, #gpu.address_space<global>> to memref<64x64xi32, #amdgpu.address_space<fat_raw_buffer>>
return %ret : memref<64x64xi32, #amdgpu.address_space<fat_raw_buffer>>
}
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir b/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir
index 86b96ca2b4b86..986ea8d80729d 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir
@@ -165,40 +165,28 @@ func.func @amdgpu.scaled_ext_packed_matrix_invalid_dst_elem_type(%v: vector<16xf
// -----
-#gpu_global_addrspace = 1
-#gpu_lds_addrspace = 3
-#amdgpu_fat_buffer_addrspace = 7
-
-func.func @amdgpu.make_dma_base.invalid_element_types(%idx: index, %mem: memref<8xi32, #gpu_global_addrspace>, %smem: memref<8xf32,#gpu_lds_addrspace>) -> (!amdgpu.tdm_base<i32>) {
+func.func @amdgpu.make_dma_base.invalid_element_types(%idx: index, %mem: memref<8xi32, #gpu.address_space<global>>, %smem: memref<8xf32,#gpu.address_space<workgroup>>) -> (!amdgpu.tdm_base<i32>) {
// expected-error at +1 {{'amdgpu.make_dma_base' op failed to verify that all of {global, lds} have same element type}}
- %0 = amdgpu.make_dma_base %mem[%idx], %smem[%idx] : memref<8xi32, #gpu_global_addrspace>, memref<8xf32, #gpu_lds_addrspace> -> !amdgpu.tdm_base<i32>
+ %0 = amdgpu.make_dma_base %mem[%idx], %smem[%idx] : memref<8xi32, #gpu.address_space<global>>, memref<8xf32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
return %0 : !amdgpu.tdm_base<i32>
}
// -----
-#gpu_global_addrspace = 1
-#gpu_lds_addrspace = 3
-#amdgpu_fat_buffer_addrspace = 7
-
-func.func @amdgpu.make_dma_base.invalid_element_types(%idx: index, %mem: memref<8xi7, #gpu_global_addrspace>, %smem: memref<8xi7,#gpu_lds_addrspace>) -> (!amdgpu.tdm_base<i7>) {
+func.func @amdgpu.make_dma_base.invalid_element_types(%idx: index, %mem: memref<8xi7, #gpu.address_space<global>>, %smem: memref<8xi7,#gpu.address_space<workgroup>>) -> (!amdgpu.tdm_base<i7>) {
// expected-error at +1 {{'amdgpu.make_dma_base' op element type must be 1, 2, 4, or 8 bytes long but type was 7 bits long.}}
- %0 = amdgpu.make_dma_base %mem[%idx], %smem[%idx] : memref<8xi7, #gpu_global_addrspace>, memref<8xi7, #gpu_lds_addrspace> -> !amdgpu.tdm_base<i7>
+ %0 = amdgpu.make_dma_base %mem[%idx], %smem[%idx] : memref<8xi7, #gpu.address_space<global>>, memref<8xi7, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i7>
return %0 : !amdgpu.tdm_base<i7>
}
// -----
-#gpu_global_addrspace = 1
-#gpu_lds_addrspace = 3
-#amdgpu_fat_buffer_addrspace = 7
-
// CHECK-LABEL: func @make_dma_base
-// CHECK-SAME: (%[[IDX:.+]]: index, %[[MEM:.+]]: memref<8xi32, 1>, %[[SMEM:.+]]: memref<8xi32, 3>)
-func.func @make_dma_base(%idx: index, %mem: memref<8xi32, #gpu_global_addrspace>, %smem: memref<8xi32,#gpu_lds_addrspace>) -> (!amdgpu.tdm_base<i32>) {
+// CHECK-SAME: (%[[IDX:.+]]: index, %[[MEM:.+]]: memref<8xi32, #gpu.address_space<global>>, %[[SMEM:.+]]: memref<8xi32, #gpu.address_space<workgroup>>)
+func.func @make_dma_base(%idx: index, %mem: memref<8xi32, #gpu.address_space<global>>, %smem: memref<8xi32,#gpu.address_space<workgroup>>) -> (!amdgpu.tdm_base<i32>) {
// CHECK-DAG: %[[INT:.+]] = builtin.unrealized_conversion_cast %[[IDX]] : index to i64
- // CHECK-DAG: %[[MEMREF_DESC_MEM:.+]] = builtin.unrealized_conversion_cast %[[MEM]] : memref<8xi32, 1>
- // CHECK-DAG: %[[MEMREF_DESC_SMEM:.+]] = builtin.unrealized_conversion_cast %[[SMEM]] : memref<8xi32, 3>
+ // CHECK-DAG: %[[MEMREF_DESC_MEM:.+]] = builtin.unrealized_conversion_cast %[[MEM]] : memref<8xi32, #gpu.address_space<global>>
+ // CHECK-DAG: %[[MEMREF_DESC_SMEM:.+]] = builtin.unrealized_conversion_cast %[[SMEM]] : memref<8xi32, #gpu.address_space<workgroup>>
// CHECK-DAG: %[[C0:.+]] = llvm.mlir.constant(0 : i32) : i32
// CHECK-DAG: %[[C1:.+]] = llvm.mlir.constant(1 : i32) : i32
@@ -231,19 +219,16 @@ func.func @make_dma_base(%idx: index, %mem: memref<8xi32, #gpu_global_addrspace>
// CHECK: %[[V4I32_0_3:.+]] = llvm.insertelement %[[MEM_INT_LOW]], %[[V4I32_0_2]][%[[C2]] : i32]
// CHECK: %[[V4I32_0_4:.+]] = llvm.insertelement %[[MEM_INT_HIGH_TYPE]], %[[V4I32_0_3]][%[[C3]] : i32]
- %0 = amdgpu.make_dma_base %mem[%idx], %smem[%idx] : memref<8xi32, #gpu_global_addrspace>, memref<8xi32, #gpu_lds_addrspace> -> !amdgpu.tdm_base<i32>
+ %0 = amdgpu.make_dma_base %mem[%idx], %smem[%idx] : memref<8xi32, #gpu.address_space<global>>, memref<8xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
func.return %0 : !amdgpu.tdm_base<i32>
}
// -----
-#gpu_global_addrspace = 1
-#gpu_lds_addrspace = 3
-
// CHECK-LABEL: func @make_gather_dma_base
-// CHECK-SAME: (%[[IDX:.+]]: index, %[[MEM:.+]]: memref<8xi32, 1>, %[[SMEM:.+]]: memref<8xi32, 3>)
-func.func @make_gather_dma_base(%idx: index, %mem: memref<8xi32, #gpu_global_addrspace>, %smem: memref<8xi32,#gpu_lds_addrspace>) -> (!amdgpu.tdm_gather_base<i32, i16>, !amdgpu.tdm_gather_base<i32, i32>) {
+// CHECK-SAME: (%[[IDX:.+]]: index, %[[MEM:.+]]: memref<8xi32, #gpu.address_space<global>>, %[[SMEM:.+]]: memref<8xi32, #gpu.address_space<workgroup>>)
+func.func @make_gather_dma_base(%idx: index, %mem: memref<8xi32, #gpu.address_space<global>>, %smem: memref<8xi32,#gpu.address_space<workgroup>>) -> (!amdgpu.tdm_gather_base<i32, i16>, !amdgpu.tdm_gather_base<i32, i32>) {
// CHECK-DAG: %[[C0:.+]] = llvm.mlir.constant(0 : i32) : i32
// CHECK-DAG: %[[C1:.+]] = llvm.mlir.constant(1 : i32) : i32
@@ -257,7 +242,7 @@ func.func @make_gather_dma_base(%idx: index, %mem: memref<8xi32, #gpu_global_add
// CHECK: %[[V4I32_0_0:.+]] = llvm.mlir.poison : vector<4xi32>
// CHECK: %[[V4I32_0_1:.+]] = llvm.insertelement %[[SGPR0]], %[[V4I32_0_0]][%[[C0]] : i32]
- %0 = amdgpu.make_gather_dma_base %mem[%idx], %smem[%idx] : memref<8xi32, #gpu_global_addrspace>, memref<8xi32, #gpu_lds_addrspace> -> !amdgpu.tdm_gather_base<i32, i16>
+ %0 = amdgpu.make_gather_dma_base %mem[%idx], %smem[%idx] : memref<8xi32, #gpu.address_space<global>>, memref<8xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_gather_base<i32, i16>
// CHECK-DAG: %[[C0:.+]] = llvm.mlir.constant(0 : i32) : i32
// CHECK-DAG: %[[C1:.+]] = llvm.mlir.constant(1 : i32) : i32
@@ -276,7 +261,7 @@ func.func @make_gather_dma_base(%idx: index, %mem: memref<8xi32, #gpu_global_add
// CHECK: %[[V4I32_0_1:.+]] = llvm.insertelement %[[SGPR0]], %[[V4I32_0_0]][%[[C0]] : i32]
- %1 = amdgpu.make_gather_dma_base %mem[%idx], %smem[%idx] : memref<8xi32, #gpu_global_addrspace>, memref<8xi32, #gpu_lds_addrspace> -> !amdgpu.tdm_gather_base<i32, i32>
+ %1 = amdgpu.make_gather_dma_base %mem[%idx], %smem[%idx] : memref<8xi32, #gpu.address_space<global>>, memref<8xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_gather_base<i32, i32>
func.return %0, %1 : !amdgpu.tdm_gather_base<i32,i16>, !amdgpu.tdm_gather_base<i32,i32>
}
@@ -354,13 +339,9 @@ func.func @make_dma_descriptor(%base: !amdgpu.tdm_base<i32>) -> !amdgpu.tdm_desc
// -----
-#gpu_global_addrspace = 1
-#gpu_lds_addrspace = 3
-#amdgpu_fat_buffer_addrspace = 7
-
// CHECK-LABEL: func @make_dma_descriptor_atomic_barrier
// CHECK-SAME: (%[[BASE:.+]]: !amdgpu.tdm_base<i32>, %[[BARRIER:.+]]: {{.*}}, %[[IDX:.+]]: index)
-func.func @make_dma_descriptor_atomic_barrier(%base: !amdgpu.tdm_base<i32>, %barrier : memref<2x!amdgpu.ds_barrier_state, #gpu_lds_addrspace>, %idx: index) -> !amdgpu.tdm_descriptor {
+func.func @make_dma_descriptor_atomic_barrier(%base: !amdgpu.tdm_base<i32>, %barrier : memref<2x!amdgpu.ds_barrier_state, #gpu.address_space<workgroup>>, %idx: index) -> !amdgpu.tdm_descriptor {
// CHECK-DAG: %[[INDEX:.+]] = builtin.unrealized_conversion_cast %[[IDX]] : index to i64
// CHECK-DAG: %[[BARRIER_MEMREF_DESC:.+]] = builtin.unrealized_conversion_cast %[[BARRIER]]
// CHECK-DAG: %[[DGROUP0:.+]] = builtin.unrealized_conversion_cast %[[BASE]]
@@ -403,7 +384,7 @@ func.func @make_dma_descriptor_atomic_barrier(%base: !amdgpu.tdm_base<i32>, %bar
%descriptor = amdgpu.make_dma_descriptor %base globalSize [128, 64]
globalStride [64, 1]
sharedSize [128, 64]
- atomicBarrier(%barrier[%idx] : memref<2x!amdgpu.ds_barrier_state, #gpu_lds_addrspace>)
+ atomicBarrier(%barrier[%idx] : memref<2x!amdgpu.ds_barrier_state, #gpu.address_space<workgroup>>)
: !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
func.return %descriptor : !amdgpu.tdm_descriptor
}
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/load_lds-gfx950.mlir b/mlir/test/Conversion/AMDGPUToROCDL/load_lds-gfx950.mlir
index 42fb18006bea4..5bbbf8405105e 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/load_lds-gfx950.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/load_lds-gfx950.mlir
@@ -1,17 +1,13 @@
// RUN: not mlir-opt %s --split-input-file -convert-amdgpu-to-rocdl=chipset=gfx942 2>&1 | FileCheck %s --check-prefix=GFX942
// RUN: mlir-opt %s --split-input-file -convert-amdgpu-to-rocdl=chipset=gfx950 | FileCheck %s --check-prefix=GFX950
-#gpu_global_addrspace = 1
-#gpu_lds_addrspace = 3
-#amdgpu_fat_buffer_addrspace = 7
-
// GFX950-LABEL: func @fat_buffer_load_to_rocdl_f96
-// GFX950-SAME: (%[[ARG0:.*]]: memref<128x72xf32, 7>)
-func.func @fat_buffer_load_to_rocdl_f96(%global : memref<128x72xf32, #amdgpu_fat_buffer_addrspace>) {
+// GFX950-SAME: (%[[ARG0:.*]]: memref<128x72xf32, #amdgpu.address_space<fat_raw_buffer>>)
+func.func @fat_buffer_load_to_rocdl_f96(%global : memref<128x72xf32, #amdgpu.address_space<fat_raw_buffer>>) {
%c0 = arith.constant 0 : index
%c12 = arith.constant 12 : index
%c32 = arith.constant 32 : index
- %alloc = memref.alloc() : memref<64x64xf32, #gpu_lds_addrspace>
+ %alloc = memref.alloc() : memref<64x64xf32, #gpu.address_space<workgroup>>
// GFX950: %[[BUFFER_DESC:.*]] = builtin.unrealized_conversion_cast %[[ARG0]]
// GFX950: %[[C0:.*]] = arith.constant 0 : index
@@ -40,23 +36,19 @@ func.func @fat_buffer_load_to_rocdl_f96(%global : memref<128x72xf32, #amdgpu_fat
// GFX950: rocdl.load.to.lds %[[GLOBAL...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/180661
More information about the Mlir-commits
mailing list