[Mlir-commits] [mlir] aba7d72 - [mlir][amdgpu] gfx1250+ lower fat_raw_pointer_cast (#175047)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Thu Jan 8 13:47:28 PST 2026
Author: Erick Ochoa Lopez
Date: 2026-01-08T16:47:24-05:00
New Revision: aba7d72c8da85c35c5ae4f74f6f86f06414bf93f
URL: https://github.com/llvm/llvm-project/commit/aba7d72c8da85c35c5ae4f74f6f86f06414bf93f
DIFF: https://github.com/llvm/llvm-project/commit/aba7d72c8da85c35c5ae4f74f6f86f06414bf93f.diff
LOG: [mlir][amdgpu] gfx1250+ lower fat_raw_pointer_cast (#175047)
* numRecords are set to all 1s if out of bounds is not requested.
* set flags correctly to zero.
Added:
Modified:
mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 31aea34625f2c..6427807e944a1 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -110,8 +110,12 @@ static Value getLinearIndexI32(ConversionPatternRewriter &rewriter,
static Value getNumRecords(ConversionPatternRewriter &rewriter, Location loc,
MemRefType memrefType,
MemRefDescriptor &memrefDescriptor,
- ArrayRef<int64_t> strides,
- int64_t elementByteWidth) {
+ ArrayRef<int64_t> strides, int64_t elementByteWidth,
+ amdgpu::Chipset chipset, bool boundsCheck) {
+ if (chipset >= kGfx1250 && !boundsCheck) {
+ constexpr int64_t first45bits = (1ll << 45) - 1;
+ return createI64Constant(rewriter, loc, first45bits);
+ }
if (memrefType.hasStaticShape() &&
!llvm::any_of(strides, ShapedType::isDynamic)) {
int64_t size = memrefType.getRank() == 0 ? 1 : 0;
@@ -156,26 +160,39 @@ static Value makeBufferRsrc(ConversionPatternRewriter &rewriter, Location loc,
stride = LLVM::ConstantOp::create(rewriter, loc, i16,
rewriter.getI16IntegerAttr(0));
}
- // Get the number of elements.
- // Flag word:
- // bits 0-11: dst sel, ignored by these intrinsics
- // bits 12-14: data format (ignored, must be nonzero, 7=float)
- // bits 15-18: data format (ignored, must be nonzero, 4=32bit)
- // bit 19: In nested heap (0 here)
- // bit 20: Behavior on unmap (0 means "return 0 / ignore")
- // bits 21-22: Index stride for swizzles (N/A)
- // bit 23: Add thread ID (0)
- // bit 24: Reserved to 1 (RDNA) or 0 (CDNA)
- // bits 25-26: Reserved (0)
- // bit 27: Buffer is non-volatile (CDNA only)
- // bits 28-29: Out of bounds select (0 = structured, 1 = check index, 2 =
- // none, 3 = either swizzles or testing against offset field) RDNA only
- // bits 30-31: Type (must be 0)
- uint32_t flags = (7 << 12) | (4 << 15);
- if (chipset.majorVersion >= 10) {
- flags |= (1 << 24);
- uint32_t oob = boundsCheck ? 3 : 2;
- flags |= (oob << 28);
+
+ uint32_t flags = 0;
+ if (chipset >= kGfx1250) {
+ // Flag word:
+ // bit 0: swizzle
+ // bit 1: 0 means (total_offset + payload > numRecords)
+ // 1 means ((total_offset + payload >) numRecords) || ((offset +
+ // payload) > stride) only applied when swizzle_enable = 0. keep at
+ // zero.
+ // whether oob is done depends on numRecords.
+ // bits 2-3: Type (must be 0)
+ } else {
+ // Get the number of elements.
+ // Flag word:
+ // bits 0-11: dst sel, ignored by these intrinsics
+ // bits 12-14: data format (ignored, must be nonzero, 7=float)
+ // bits 15-18: data format (ignored, must be nonzero, 4=32bit)
+ // bit 19: In nested heap (0 here)
+ // bit 20: Behavior on unmap (0 means "return 0 / ignore")
+ // bits 21-22: Index stride for swizzles (N/A)
+ // bit 23: Add thread ID (0)
+ // bit 24: Reserved to 1 (RDNA) or 0 (CDNA)
+ // bits 25-26: Reserved (0)
+ // bit 27: Buffer is non-volatile (CDNA only)
+ // bits 28-29: Out of bounds select (0 = structured, 1 = check index, 2 =
+ // none, 3 = either swizzles or testing against offset field) RDNA only
+ // bits 30-31: Type (must be 0)
+ flags |= (7 << 12) | (4 << 15);
+ if (chipset.majorVersion >= 10) {
+ flags |= (1 << 24);
+ uint32_t oob = boundsCheck ? 3 : 2;
+ flags |= (oob << 28);
+ }
}
Value flagsConst = createI32Constant(rewriter, loc, flags);
Type rsrcType =
@@ -214,8 +231,9 @@ struct FatRawBufferCastLowering
Value numRecords = adaptor.getValidBytes();
if (!numRecords)
- numRecords = getNumRecords(rewriter, loc, memrefType, descriptor,
- strideVals, elementByteWidth);
+ numRecords =
+ getNumRecords(rewriter, loc, memrefType, descriptor, strideVals,
+ elementByteWidth, chipset, adaptor.getBoundsCheck());
Value basePointer =
adaptor.getResetOffset()
@@ -386,8 +404,9 @@ struct RawBufferOpLowering : public ConvertOpToLLVMPattern<GpuOp> {
Value ptr = memrefDescriptor.bufferPtr(
rewriter, loc, *this->getTypeConverter(), memrefType);
- Value numRecords = getNumRecords(
- rewriter, loc, memrefType, memrefDescriptor, strides, elementByteWidth);
+ Value numRecords =
+ getNumRecords(rewriter, loc, memrefType, memrefDescriptor, strides,
+ elementByteWidth, chipset, adaptor.getBoundsCheck());
Value resource = makeBufferRsrc(rewriter, loc, ptr, numRecords,
adaptor.getBoundsCheck(), chipset);
args.push_back(resource);
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
index 50323dc726640..e55bca4bad42f 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
@@ -4,6 +4,7 @@
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1030 | FileCheck %s --check-prefixes=CHECK,GFX10,RDNA
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1100 | FileCheck %s --check-prefixes=CHECK,GFX11,RDNA
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1201 | FileCheck %s --check-prefixes=CHECK,GFX12,RDNA
+// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1250 | FileCheck %s --check-prefixes=CHECK,GFX1250
// Note: #gpu.address_space<global> is hardcoded to `1` here because the
// test pass doesn't set up the GPU address space conversions.
@@ -22,6 +23,7 @@ func.func @fat_raw_buffer_cast(%buf: memref<8xi32, #gpu_global_addrspace>) -> me
// CHECK-DAG: %[[numRecords:.*]] = llvm.mlir.constant(32 : i64) : i64
// CHECK-DAG: %[[strideArg:.*]] = llvm.mlir.constant(0 : i16) : i16
// GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
+ // GFX1250: %[[flags:.*]] = llvm.mlir.constant(0 : i32)
// RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
// CHECK: %[[fatBuf:.*]] = rocdl.make.buffer.rsrc %[[base]], %[[strideArg]], %[[numRecords]], %[[flags]] : <1> to <7>
// CHECK: %[[ret0:.*]] = llvm.mlir.poison : !llvm.struct<(ptr<7>, ptr<7>, i64, array<1 x i64>, array<1 x i64>)>
@@ -43,6 +45,7 @@ func.func @fat_raw_buffer_cast_0d(%buf: memref<i32, #gpu_global_addrspace>) -> m
// CHECK-DAG: %[[numRecords:.*]] = llvm.mlir.constant(4 : i64) : i64
// CHECK-DAG: %[[strideArg:.*]] = llvm.mlir.constant(0 : i16) : i16
// GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
+ // GFX1250: %[[flags:.*]] = llvm.mlir.constant(0 : i32)
// RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
// CHECK: %[[fatBuf:.*]] = rocdl.make.buffer.rsrc %[[base]], %[[strideArg]], %[[numRecords]], %[[flags]]
// CHECK: %[[ret0:.*]] = llvm.mlir.poison : !llvm.struct<(ptr<7>, ptr<7>, i64)>
@@ -93,9 +96,13 @@ func.func @fat_raw_buffer_cast_valid_bytes(%buf: memref<8xi32, #gpu_global_addrs
// CHECK-LABEL: func @fat_raw_buffer_cast_bounds_check
func.func @fat_raw_buffer_cast_bounds_check(%buf: memref<8xi32, #gpu_global_addrspace>) -> memref<8xi32, #amdgpu.address_space<fat_raw_buffer>> {
+ // GFX9: %[[numRecords:.*]] = llvm.mlir.constant({{.*}} : i64)
// GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
+ // GFX1250: %[[numRecords:.*]] = llvm.mlir.constant(35184372088831 : i64)
+ // GFX1250: %[[flags:.*]] = llvm.mlir.constant(0 : i32)
+ // RDNA: %[[numRecords:.*]] = llvm.mlir.constant({{.*}} : i64)
// RDNA: %[[flags:.*]] = llvm.mlir.constant(553807872 : i32)
- // CHECK: %[[rsrc:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %{{.*}}, %[[flags]]
+ // CHECK: %[[rsrc:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
%ret = amdgpu.fat_raw_buffer_cast %buf boundsCheck(false) : memref<8xi32, #gpu_global_addrspace> to memref<8xi32, #amdgpu.address_space<fat_raw_buffer>>
return %ret : memref<8xi32, #amdgpu.address_space<fat_raw_buffer>>
}
@@ -104,12 +111,18 @@ func.func @fat_raw_buffer_cast_bounds_check(%buf: memref<8xi32, #gpu_global_addr
// CHECK-SAME: (%{{.*}}: memref<64x64xi32, 1>, %[[stride:.*]]: i14)
func.func @fat_raw_buffer_cast_cache_swizzle(%buf: memref<64x64xi32, #gpu_global_addrspace>, %stride: i14) -> memref<64x64xi32, #amdgpu.address_space<fat_raw_buffer>> {
// GFX908: %[[stride:.*]] = llvm.mlir.constant(0 : i16) : i16
+ // GFX908: %[[flags:.*]] = llvm.mlir.constant
// GFX90A: %[[stride:.*]] = llvm.mlir.constant(0 : i16) : i16
+ // GFX90A: %[[flags:.*]] = llvm.mlir.constant
// RDNA: %[[stride:.*]] = llvm.mlir.constant(0 : i16) : i16
+ // RDNA: %[[flags:.*]] = llvm.mlir.constant
// GFX942: %[[asI16:.*]] = llvm.zext %[[stride]] : i14 to i16
// GFX942: %[[cacheSwizzleOn:.*]] = llvm.mlir.constant(16384 : i16) : i16
// GFX942: %[[stride:.*]] = llvm.or disjoint %[[asI16]], %[[cacheSwizzleOn]]
- // CHECK: rocdl.make.buffer.rsrc %{{.*}}, %[[stride]], %{{.*}}, %{{.*}}
+ // GFX942: %[[flags:.*]] = llvm.mlir.constant
+ // GFX1250: %[[stride:.*]] = llvm.mlir.constant(0 : i16) : i16
+ // GFX1250: %[[flags:.*]] = llvm.mlir.constant(0 : i32)
+ // CHECK: rocdl.make.buffer.rsrc %{{.*}}, %[[stride]], %{{.*}}, %[[flags]]
%ret = amdgpu.fat_raw_buffer_cast %buf cacheSwizzleStride(%stride) : memref<64x64xi32, #gpu_global_addrspace> to memref<64x64xi32, #amdgpu.address_space<fat_raw_buffer>>
return %ret : memref<64x64xi32, #amdgpu.address_space<fat_raw_buffer>>
}
@@ -120,6 +133,7 @@ func.func @gpu_gcn_raw_buffer_load_scalar_i32(%buf: memref<i32>) -> i32 {
// CHECK: %[[stride:.*]] = llvm.mlir.constant(0 : i16)
// GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
// RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
+ // GFX1250: %[[flags:.*]] = llvm.mlir.constant(0 : i32)
// CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %[[stride]], %[[numRecords]], %[[flags]] : !llvm.ptr to <8>
// CHECK: %[[ret:.*]] = rocdl.raw.ptr.buffer.load %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
// CHECK: return %[[ret]]
@@ -133,6 +147,7 @@ func.func @gpu_gcn_raw_buffer_load_i32(%buf: memref<64xi32>, %idx: i32) -> i32 {
// CHECK: %[[stride:.*]] = llvm.mlir.constant(0 : i16)
// GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
// RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
+ // GFX1250: %[[flags:.*]] = llvm.mlir.constant(0 : i32)
// CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %[[stride]], %[[numRecords]], %[[flags]] : !llvm.ptr to <8>
// CHECK: %[[ret:.*]] = rocdl.raw.ptr.buffer.load %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
// CHECK: return %[[ret]]
@@ -179,6 +194,7 @@ func.func @gpu_gcn_raw_buffer_load_i32_strided(%buf: memref<16x16xi32, strided<[
func.func @gpu_gcn_raw_buffer_load_i32_oob_off(%buf: memref<64xi32>, %idx: i32) -> i32 {
// GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
// RDNA: %[[flags:.*]] = llvm.mlir.constant(553807872 : i32)
+ // GFX1250: %[[flags:.*]] = llvm.mlir.constant(0 : i32)
// CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %{{.*}}, %[[flags]]
// CHECK: %[[ret:.*]] = rocdl.raw.ptr.buffer.load %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
// CHECK: return %[[ret]]
@@ -261,6 +277,7 @@ func.func @gpu_gcn_raw_buffer_load_4xf8E4M3FNUZ(%buf: memref<64xf8E4M3FNUZ>, %id
func.func @gpu_gcn_raw_buffer_store_scalar_i32(%value: i32, %buf: memref<i32>) {
// GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
// RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
+ // GFX1250: %[[flags:.*]] = llvm.mlir.constant(0 : i32)
// CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %{{.*}}, %[[flags]]
// CHECK: rocdl.raw.ptr.buffer.store %{{.*}}, %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
amdgpu.raw_buffer_store {boundsCheck = true} %value -> %buf[] : i32 -> memref<i32>
@@ -272,6 +289,7 @@ func.func @gpu_gcn_raw_buffer_store_i32(%value: i32, %buf: memref<64xi32>, %idx:
// CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i64)
// GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
// RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
+ // GFX1250: %[[flags:.*]] = llvm.mlir.constant(0 : i32)
// CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
// CHECK: rocdl.raw.ptr.buffer.store %{{.*}}, %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
amdgpu.raw_buffer_store {boundsCheck = true} %value -> %buf[%idx] : i32 -> memref<64xi32>, i32
@@ -308,6 +326,7 @@ func.func @gpu_gcn_raw_buffer_atomic_fadd_f32(%value: f32, %buf: memref<64xf32>,
// CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i64)
// GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
// RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
+ // GFX1250: %[[flags:.*]] = llvm.mlir.constant(0 : i32)
// CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
// CHECK: rocdl.raw.ptr.buffer.atomic.fadd %{{.*}}, %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : f32
amdgpu.raw_buffer_atomic_fadd {boundsCheck = true} %value -> %buf[%idx] : f32 -> memref<64xf32>, i32
@@ -319,6 +338,7 @@ func.func @gpu_gcn_raw_buffer_atomic_fadd_v2f16(%value: vector<2xf16>, %buf: mem
// CHECK: %[[numRecords:.*]] = llvm.mlir.constant(128 : i64)
// GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
// RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
+ // GFX1250: %[[flags:.*]] = llvm.mlir.constant(0 : i32)
// CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
// CHECK: rocdl.raw.ptr.buffer.atomic.fadd %{{.*}}, %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : vector<2xf16>
amdgpu.raw_buffer_atomic_fadd {boundsCheck = true} %value -> %buf[%idx] : vector<2xf16> -> memref<64xf16>, i32
@@ -330,6 +350,7 @@ func.func @gpu_gcn_raw_buffer_atomic_fadd_v2bf16(%value: vector<2xbf16>, %buf: m
// CHECK: %[[numRecords:.*]] = llvm.mlir.constant(128 : i64)
// GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
// RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
+ // GFX1250: %[[flags:.*]] = llvm.mlir.constant(0 : i32)
// CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
// CHECK: rocdl.raw.ptr.buffer.atomic.fadd %{{.*}}, %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : vector<2xbf16>
amdgpu.raw_buffer_atomic_fadd {boundsCheck = true} %value -> %buf[%idx] : vector<2xbf16> -> memref<64xbf16>, i32
@@ -341,6 +362,7 @@ func.func @gpu_gcn_raw_buffer_atomic_fmax_f32(%value: f32, %buf: memref<64xf32>,
// CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i64)
// GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
// RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
+ // GFX1250: %[[flags:.*]] = llvm.mlir.constant(0 : i32)
// CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
// CHECK: rocdl.raw.ptr.buffer.atomic.fmax %{{.*}}, %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : f32
amdgpu.raw_buffer_atomic_fmax {boundsCheck = true} %value -> %buf[%idx] : f32 -> memref<64xf32>, i32
@@ -352,6 +374,7 @@ func.func @gpu_gcn_raw_buffer_atomic_smax_i32(%value: i32, %buf: memref<64xi32>,
// CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i64)
// GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
// RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
+ // GFX1250: %[[flags:.*]] = llvm.mlir.constant(0 : i32)
// CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
// CHECK: rocdl.raw.ptr.buffer.atomic.smax %{{.*}} %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
amdgpu.raw_buffer_atomic_smax {boundsCheck = true} %value -> %buf[%idx] : i32 -> memref<64xi32>, i32
@@ -363,6 +386,7 @@ func.func @gpu_gcn_raw_buffer_atomic_umin_i32(%value: i32, %buf: memref<64xi32>,
// CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i64)
// GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
// RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
+ // GFX1250: %[[flags:.*]] = llvm.mlir.constant(0 : i32)
// CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
// CHECK: rocdl.raw.ptr.buffer.atomic.umin %{{.*}} %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
amdgpu.raw_buffer_atomic_umin {boundsCheck = true} %value -> %buf[%idx] : i32 -> memref<64xi32>, i32
@@ -377,6 +401,7 @@ func.func @amdgpu_raw_buffer_atomic_cmpswap_f32(%src : f32, %cmp : f32, %buf : m
// CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i64)
// GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
// RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
+ // GFX1250: %[[flags:.*]] = llvm.mlir.constant(0 : i32)
// CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
// CHECK: %[[dst:.*]] = rocdl.raw.ptr.buffer.atomic.cmpswap %[[srcCast]], %[[cmpCast]], %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
// CHECK: %[[dstCast:.*]] = llvm.bitcast %[[dst]] : i32 to f32
@@ -391,6 +416,7 @@ func.func @amdgpu_raw_buffer_atomic_cmpswap_i64(%src : i64, %cmp : i64, %buf : m
// CHECK: %[[numRecords:.*]] = llvm.mlir.constant(512 : i64)
// GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
// RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
+ // GFX1250: %[[flags:.*]] = llvm.mlir.constant(0 : i32)
// CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
// CHECK: %[[dst:.*]] = rocdl.raw.ptr.buffer.atomic.cmpswap %[[src]], %[[cmp]], %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i64
// CHECK: return %[[dst]]
@@ -421,6 +447,8 @@ func.func @lds_barrier() {
// GFX11-NEXT: rocdl.s.barrier
// GFX12-NEXT: rocdl.s.barrier.signal id = -1
// GFX12-NEXT: rocdl.s.barrier.wait id = -1
+ // GFX1250-NEXT: rocdl.s.barrier.signal id = -1
+ // GFX1250-NEXT: rocdl.s.barrier.wait id = -1
// CHECK-NEXT: llvm.fence syncscope("workgroup") acquire {llvm.mmra = #[[$MMRA_TAG]]}
amdgpu.lds_barrier
func.return
More information about the Mlir-commits
mailing list