[Mlir-commits] [mlir] 584f643 - [MLIR][AMDGPU][ROCDL] Adding raw.buffer.atomic.fmax/smax/umin support
Krzysztof Drewniak
llvmlistbot at llvm.org
Tue Feb 28 08:58:41 PST 2023
Author: Manupa Karunaratne
Date: 2023-02-28T16:58:35Z
New Revision: 584f64365a4324f8ddbe4811cb1dc455c8d4297b
URL: https://github.com/llvm/llvm-project/commit/584f64365a4324f8ddbe4811cb1dc455c8d4297b
DIFF: https://github.com/llvm/llvm-project/commit/584f64365a4324f8ddbe4811cb1dc455c8d4297b.diff
LOG: [MLIR][AMDGPU][ROCDL] Adding raw.buffer.atomic.fmax/smax/umin support
This commit adds support for atomic fmax/smax/umin support
for AMDGPU dialect and the dependent dialects to allow such
a lowering.
Reviewed By: krzysz00
Differential Revision: https://reviews.llvm.org/D144097
Added:
Modified:
mlir/include/mlir/Dialect/AMDGPU/AMDGPU.td
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp
mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
mlir/test/Dialect/LLVMIR/rocdl.mlir
mlir/test/Target/LLVMIR/rocdl.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/AMDGPU/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/AMDGPU.td
index 6a113443eff1b..92ded941aa7c0 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/AMDGPU.td
@@ -175,6 +175,120 @@ def AMDGPU_RawBufferAtomicFaddOp :
let hasVerifier = 1;
}
+// Raw buffer atomic floating point max
+def AMDGPU_RawBufferAtomicFmaxOp :
+ AMDGPU_Op<"raw_buffer_atomic_fmax", [AllElementTypesMatch<["value", "memref"]>,
+ AttrSizedOperandSegments]>,
+ Arguments<(ins F32:$value,
+ Arg<AnyMemRef, "buffer to operate on", [MemRead, MemWrite]>:$memref,
+ Variadic<I32>:$indices,
+ DefaultValuedAttr<BoolAttr, "true">:$boundsCheck,
+ OptionalAttr<I32Attr>:$indexOffset,
+ Optional<I32>:$sgprOffset)> {
+
+ let summary = "Raw Buffer Floating-point Atomic Max (non-GFX9)";
+ let description = [{
+ The `amdgpu.raw_buffer_atomic_fmax` op is a wrapper around the
+ buffer-based atomic floating point max available on AMD GPUs (except GFX9).
+
+ The index into the buffer is computed as for `memref.store` with the addition
+ of `indexOffset` (which is used to aid in emitting vectorized code) and,
+ if present `sgprOffset` (which is added after bounds checks and includes
+ any non-zero offset on the memref type).
+
+ All indexing components are given in terms of the memref's element size, not
+ the byte lengths required by the intrinsic.
+
+ Out of bounds atomic operations are ignored in hardware.
+
+ See `amdgpu.raw_buffer_load` for a description of how the underlying
+ instruction is constructed.
+ }];
+ let assemblyFormat = [{
+ attr-dict $value `->` $memref `[` $indices `]`
+ (`sgprOffset` $sgprOffset^)? `:`
+ type($value) `->` type($memref) `,` type($indices)
+ }];
+ let hasCanonicalizer = 1;
+ let hasVerifier = 1;
+}
+
+// Raw buffer atomic signed integer max
+def AMDGPU_RawBufferAtomicSmaxOp :
+ AMDGPU_Op<"raw_buffer_atomic_smax", [
+ AttrSizedOperandSegments]>,
+ Arguments<(ins I32:$value,
+ Arg<AnyMemRef, "buffer to operate on", [MemRead, MemWrite]>:$memref,
+ Variadic<I32>:$indices,
+ DefaultValuedAttr<BoolAttr, "true">:$boundsCheck,
+ OptionalAttr<I32Attr>:$indexOffset,
+ Optional<I32>:$sgprOffset)> {
+
+ let summary = "Raw Buffer Signed Integer Atomic Max";
+ let description = [{
+ The `amdgpu.raw_buffer_atomic_smax` op is a wrapper around the
+ buffer-based atomic signed integer max available on AMD GPUs.
+
+ The index into the buffer is computed as for `memref.store` with the addition
+ of `indexOffset` (which is used to aid in emitting vectorized code) and,
+ if present `sgprOffset` (which is added after bounds checks and includes
+ any non-zero offset on the memref type).
+
+ All indexing components are given in terms of the memref's element size, not
+ the byte lengths required by the intrinsic.
+
+ Out of bounds atomic operations are ignored in hardware.
+
+ See `amdgpu.raw_buffer_load` for a description of how the underlying
+ instruction is constructed.
+ }];
+ let assemblyFormat = [{
+ attr-dict $value `->` $memref `[` $indices `]`
+ (`sgprOffset` $sgprOffset^)? `:`
+ type($value) `->` type($memref) `,` type($indices)
+ }];
+ let hasCanonicalizer = 1;
+ let hasVerifier = 1;
+}
+
+// Raw buffer atomic unsigned integer min
+def AMDGPU_RawBufferAtomicUminOp :
+ AMDGPU_Op<"raw_buffer_atomic_umin", [
+ AttrSizedOperandSegments]>,
+ Arguments<(ins I32:$value,
+ Arg<AnyMemRef, "buffer to operate on", [MemRead, MemWrite]>:$memref,
+ Variadic<I32>:$indices,
+ DefaultValuedAttr<BoolAttr, "true">:$boundsCheck,
+ OptionalAttr<I32Attr>:$indexOffset,
+ Optional<I32>:$sgprOffset)> {
+
+ let summary = "Raw Buffer Unsigned Integer Atomic Min";
+ let description = [{
+ The `amdgpu.raw_buffer_atomic_umin` op is a wrapper around the
+ buffer-based atomic signed integer min available on AMD GPUs.
+
+ The index into the buffer is computed as for `memref.store` with the addition
+ of `indexOffset` (which is used to aid in emitting vectorized code) and,
+ if present `sgprOffset` (which is added after bounds checks and includes
+ any non-zero offset on the memref type).
+
+ All indexing components are given in terms of the memref's element size, not
+ the byte lengths required by the intrinsic.
+
+ Out of bounds atomic operations are ignored in hardware.
+
+ See `amdgpu.raw_buffer_load` for a description of how the underlying
+ instruction is constructed.
+ }];
+ let assemblyFormat = [{
+ attr-dict $value `->` $memref `[` $indices `]`
+ (`sgprOffset` $sgprOffset^)? `:`
+ type($value) `->` type($memref) `,` type($indices)
+ }];
+ let hasCanonicalizer = 1;
+ let hasVerifier = 1;
+}
+
def AMDGPU_LDSBarrierOp : AMDGPU_Op<"lds_barrier"> {
let summary = "Barrier that includes a wait for LDS memory operations.";
let description = [{
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index c3bea1cfb1ba5..470fdfef4f1ee 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -271,4 +271,61 @@ def ROCDL_RawBufferAtomicFAddOp :
let hasCustomAssemblyFormat = 1;
}
+//===---------------------------------------------------------------------===//
+// Buffer atomic floating point max intrinsic. GFX9 does not support fp32.
+
+def ROCDL_RawBufferAtomicFMaxOp :
+ ROCDL_Op<"raw.buffer.atomic.fmax">,
+ Arguments<(ins LLVM_Type:$vdata,
+ LLVM_Type:$rsrc,
+ LLVM_Type:$offset,
+ LLVM_Type:$soffset,
+ LLVM_Type:$aux)>{
+ string llvmBuilder = [{
+ auto vdataType = moduleTranslation.convertType(op.getVdata().getType());
+ createIntrinsicCall(builder,
+ llvm::Intrinsic::amdgcn_raw_buffer_atomic_fmax, {$vdata, $rsrc,
+ $offset, $soffset, $aux}, {vdataType});
+ }];
+ let hasCustomAssemblyFormat = 1;
+}
+
+//===---------------------------------------------------------------------===//
+// Buffer atomic signed integer max intrinsic.
+
+def ROCDL_RawBufferAtomicSMaxOp :
+ ROCDL_Op<"raw.buffer.atomic.smax">,
+ Arguments<(ins LLVM_Type:$vdata,
+ LLVM_Type:$rsrc,
+ LLVM_Type:$offset,
+ LLVM_Type:$soffset,
+ LLVM_Type:$aux)>{
+ string llvmBuilder = [{
+ auto vdataType = moduleTranslation.convertType(op.getVdata().getType());
+ createIntrinsicCall(builder,
+ llvm::Intrinsic::amdgcn_raw_buffer_atomic_smax, {$vdata, $rsrc,
+ $offset, $soffset, $aux}, {vdataType});
+ }];
+ let hasCustomAssemblyFormat = 1;
+}
+
+//===---------------------------------------------------------------------===//
+// Buffer atomic unsigned integer min intrinsic.
+
+def ROCDL_RawBufferAtomicUMinOp :
+ ROCDL_Op<"raw.buffer.atomic.umin">,
+ Arguments<(ins LLVM_Type:$vdata,
+ LLVM_Type:$rsrc,
+ LLVM_Type:$offset,
+ LLVM_Type:$soffset,
+ LLVM_Type:$aux)>{
+ string llvmBuilder = [{
+ auto vdataType = moduleTranslation.convertType(op.getVdata().getType());
+ createIntrinsicCall(builder,
+ llvm::Intrinsic::amdgcn_raw_buffer_atomic_umin, {$vdata, $rsrc,
+ $offset, $soffset, $aux}, {vdataType});
+ }];
+ let hasCustomAssemblyFormat = 1;
+}
+
#endif // ROCDLIR_OPS
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index e4dcb27290ee1..3a1785c0edffa 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -527,6 +527,9 @@ void mlir::populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter,
RawBufferOpLowering<RawBufferLoadOp, ROCDL::RawBufferLoadOp>,
RawBufferOpLowering<RawBufferStoreOp, ROCDL::RawBufferStoreOp>,
RawBufferOpLowering<RawBufferAtomicFaddOp, ROCDL::RawBufferAtomicFAddOp>,
+ RawBufferOpLowering<RawBufferAtomicFmaxOp, ROCDL::RawBufferAtomicFMaxOp>,
+ RawBufferOpLowering<RawBufferAtomicSmaxOp, ROCDL::RawBufferAtomicSMaxOp>,
+ RawBufferOpLowering<RawBufferAtomicUminOp, ROCDL::RawBufferAtomicUMinOp>,
MFMAOpLowering>(converter, chipset);
}
diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
index a19edd513bb73..9150e7368849d 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
+++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
@@ -78,6 +78,18 @@ LogicalResult RawBufferAtomicFaddOp::verify() {
return verifyRawBufferOp(*this);
}
+LogicalResult RawBufferAtomicFmaxOp::verify() {
+ return verifyRawBufferOp(*this);
+}
+
+LogicalResult RawBufferAtomicSmaxOp::verify() {
+ return verifyRawBufferOp(*this);
+}
+
+LogicalResult RawBufferAtomicUminOp::verify() {
+ return verifyRawBufferOp(*this);
+}
+
static std::optional<uint32_t> getConstantUint32(Value v) {
APInt cst;
if (!v.getType().isInteger(32))
@@ -168,6 +180,21 @@ void RawBufferAtomicFaddOp::getCanonicalizationPatterns(
results.add<RemoveStaticallyOobBufferWrites<RawBufferAtomicFaddOp>>(context);
}
+void RawBufferAtomicFmaxOp::getCanonicalizationPatterns(
+ RewritePatternSet &results, MLIRContext *context) {
+ results.add<RemoveStaticallyOobBufferWrites<RawBufferAtomicFmaxOp>>(context);
+}
+
+void RawBufferAtomicSmaxOp::getCanonicalizationPatterns(
+ RewritePatternSet &results, MLIRContext *context) {
+ results.add<RemoveStaticallyOobBufferWrites<RawBufferAtomicSmaxOp>>(context);
+}
+
+void RawBufferAtomicUminOp::getCanonicalizationPatterns(
+ RewritePatternSet &results, MLIRContext *context) {
+ results.add<RemoveStaticallyOobBufferWrites<RawBufferAtomicUminOp>>(context);
+}
+
//===----------------------------------------------------------------------===//
// MFMAOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp
index 338e71517f4cc..1a607d92ac741 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp
@@ -154,6 +154,78 @@ void RawBufferAtomicFAddOp::print(mlir::OpAsmPrinter &p) {
p << " " << getOperands() << " : " << getVdata().getType();
}
+// <operation> ::=
+// `llvm.amdgcn.raw.buffer.atomic.fmax.* %vdata, %rsrc, %offset,
+// %soffset, %aux : result_type`
+ParseResult RawBufferAtomicFMaxOp::parse(OpAsmParser &parser,
+ OperationState &result) {
+ SmallVector<OpAsmParser::UnresolvedOperand, 5> ops;
+ Type type;
+ if (parser.parseOperandList(ops, 5) || parser.parseColonType(type))
+ return failure();
+
+ auto bldr = parser.getBuilder();
+ auto int32Ty = bldr.getI32Type();
+ auto i32x4Ty = VectorType::get({4}, int32Ty);
+
+ if (parser.resolveOperands(ops, {type, i32x4Ty, int32Ty, int32Ty, int32Ty},
+ parser.getNameLoc(), result.operands))
+ return failure();
+ return success();
+}
+
+void RawBufferAtomicFMaxOp::print(mlir::OpAsmPrinter &p) {
+ p << " " << getOperands() << " : " << getVdata().getType();
+}
+
+// <operation> ::=
+// `llvm.amdgcn.raw.buffer.atomic.smax.* %vdata, %rsrc, %offset,
+// %soffset, %aux : result_type`
+ParseResult RawBufferAtomicSMaxOp::parse(OpAsmParser &parser,
+ OperationState &result) {
+ SmallVector<OpAsmParser::UnresolvedOperand, 5> ops;
+ Type type;
+ if (parser.parseOperandList(ops, 5) || parser.parseColonType(type))
+ return failure();
+
+ auto bldr = parser.getBuilder();
+ auto int32Ty = bldr.getI32Type();
+ auto i32x4Ty = VectorType::get({4}, int32Ty);
+
+ if (parser.resolveOperands(ops, {type, i32x4Ty, int32Ty, int32Ty, int32Ty},
+ parser.getNameLoc(), result.operands))
+ return failure();
+ return success();
+}
+
+void RawBufferAtomicSMaxOp::print(mlir::OpAsmPrinter &p) {
+ p << " " << getOperands() << " : " << getVdata().getType();
+}
+
+// <operation> ::=
+// `llvm.amdgcn.raw.buffer.atomic.umin.* %vdata, %rsrc, %offset,
+// %soffset, %aux : result_type`
+ParseResult RawBufferAtomicUMinOp::parse(OpAsmParser &parser,
+ OperationState &result) {
+ SmallVector<OpAsmParser::UnresolvedOperand, 5> ops;
+ Type type;
+ if (parser.parseOperandList(ops, 5) || parser.parseColonType(type))
+ return failure();
+
+ auto bldr = parser.getBuilder();
+ auto int32Ty = bldr.getI32Type();
+ auto i32x4Ty = VectorType::get({4}, int32Ty);
+
+ if (parser.resolveOperands(ops, {type, i32x4Ty, int32Ty, int32Ty, int32Ty},
+ parser.getNameLoc(), result.operands))
+ return failure();
+ return success();
+}
+
+void RawBufferAtomicUMinOp::print(mlir::OpAsmPrinter &p) {
+ p << " " << getOperands() << " : " << getVdata().getType();
+}
+
//===----------------------------------------------------------------------===//
// ROCDLDialect initialization, type parsing, and registration.
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
index 9f56711e7f460..183fdb61db6cb 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
@@ -133,6 +133,39 @@ func.func @gpu_gcn_raw_buffer_atomic_fadd_f32(%value: f32, %buf: memref<64xf32>,
func.return
}
+// CHECK-LABEL: func @gpu_gcn_raw_buffer_atomic_fmax_f32
+func.func @gpu_gcn_raw_buffer_atomic_fmax_f32(%value: f32, %buf: memref<64xf32>, %idx: i32) {
+ // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i32)
+ // CHECK: llvm.insertelement{{.*}}%[[numRecords]]
+ // CHECK: %[[word3:.*]] = llvm.mlir.constant(159744 : i32)
+ // CHECK: %[[resource:.*]] = llvm.insertelement{{.*}}%[[word3]]
+ // CHECK: rocdl.raw.buffer.atomic.fmax %{{.*}} %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : f32
+ amdgpu.raw_buffer_atomic_fmax {boundsCheck = true} %value -> %buf[%idx] : f32 -> memref<64xf32>, i32
+ func.return
+}
+
+// CHECK-LABEL: func @gpu_gcn_raw_buffer_atomic_smax_i32
+func.func @gpu_gcn_raw_buffer_atomic_smax_i32(%value: i32, %buf: memref<64xi32>, %idx: i32) {
+ // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i32)
+ // CHECK: llvm.insertelement{{.*}}%[[numRecords]]
+ // CHECK: %[[word3:.*]] = llvm.mlir.constant(159744 : i32)
+ // CHECK: %[[resource:.*]] = llvm.insertelement{{.*}}%[[word3]]
+ // CHECK: rocdl.raw.buffer.atomic.smax %{{.*}} %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
+ amdgpu.raw_buffer_atomic_smax {boundsCheck = true} %value -> %buf[%idx] : i32 -> memref<64xi32>, i32
+ func.return
+}
+
+// CHECK-LABEL: func @gpu_gcn_raw_buffer_atomic_umin_i32
+func.func @gpu_gcn_raw_buffer_atomic_umin_i32(%value: i32, %buf: memref<64xi32>, %idx: i32) {
+ // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i32)
+ // CHECK: llvm.insertelement{{.*}}%[[numRecords]]
+ // CHECK: %[[word3:.*]] = llvm.mlir.constant(159744 : i32)
+ // CHECK: %[[resource:.*]] = llvm.insertelement{{.*}}%[[word3]]
+ // CHECK: rocdl.raw.buffer.atomic.umin %{{.*}} %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
+ amdgpu.raw_buffer_atomic_umin {boundsCheck = true} %value -> %buf[%idx] : i32 -> memref<64xi32>, i32
+ func.return
+}
+
// CHECK-LABEL: func @lds_barrier
func.func @lds_barrier() {
// CHECK: llvm.inline_asm has_side_effects asm_dialect = att "s_waitcnt lgkmcnt(0)\0As_barrier"
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index cbb13c008ab63..200d3e9f7d757 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -225,11 +225,11 @@ llvm.func @rocdl.mubuf(%rsrc : vector<4xi32>, %vindex : i32,
llvm.return
}
-llvm.func @rocdl.raw.buffer(%rsrc : vector<4xi32>,
+llvm.func @rocdl.raw.buffer.f32(%rsrc : vector<4xi32>,
%offset : i32, %soffset : i32,
%aux : i32, %vdata1 : f32,
%vdata2 : vector<2xf32>, %vdata4 : vector<4xf32>) {
- // CHECK-LABEL: rocdl.raw.buffer
+ // CHECK-LABEL: rocdl.raw.buffer.f32
// CHECK: %{{.*}} = rocdl.raw.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} : f32
// CHECK: %{{.*}} = rocdl.raw.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} : vector<2xf32>
// CHECK: %{{.*}} = rocdl.raw.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} : vector<4xf32>
@@ -249,10 +249,25 @@ llvm.func @rocdl.raw.buffer(%rsrc : vector<4xi32>,
rocdl.raw.buffer.store %vdata4, %rsrc, %offset, %offset, %aux : vector<4xf32>
rocdl.raw.buffer.atomic.fadd %vdata1, %rsrc, %offset, %soffset, %aux : f32
+ rocdl.raw.buffer.atomic.fmax %vdata1, %rsrc, %offset, %soffset, %aux : f32
llvm.return
}
+
+llvm.func @rocdl.raw.buffer.i32(%rsrc : vector<4xi32>,
+ %offset : i32, %soffset : i32,
+ %aux : i32, %vdata1 : i32,
+ %vdata2 : vector<2xi32>, %vdata4 : vector<4xi32>) {
+ // CHECK-LABEL: rocdl.raw.buffer.i32
+ // CHECK: rocdl.raw.buffer.atomic.smax %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : i32
+ // CHECK: rocdl.raw.buffer.atomic.umin %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : i32
+
+ rocdl.raw.buffer.atomic.smax %vdata1, %rsrc, %offset, %soffset, %aux : i32
+ rocdl.raw.buffer.atomic.umin %vdata1, %rsrc, %offset, %soffset, %aux : i32
+ llvm.return
+}
+
// -----
// expected-error at below {{attribute attached to unexpected op}}
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index 49a01f66c9bc4..1a65967974e87 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -266,14 +266,30 @@ llvm.func @rocdl.raw.buffer(%rsrc : vector<4xi32>,
llvm.return
}
-llvm.func @rocdl.raw.buffer.atomic(%rsrc : vector<4xi32>,
+llvm.func @rocdl.raw.buffer.atomic.f32(%rsrc : vector<4xi32>,
%offset : i32, %soffset : i32,
%vdata1 : f32) {
%aux = llvm.mlir.constant(0 : i32) : i32
- // CHECK-LABEL: rocdl.raw.buffer.atomic
+ // CHECK-LABEL: rocdl.raw.buffer.atomic.f32
// CHECK: call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 {{.*}}
+ // CHECK: call float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 {{.*}}
rocdl.raw.buffer.atomic.fadd %vdata1, %rsrc, %offset, %soffset, %aux : f32
+ rocdl.raw.buffer.atomic.fmax %vdata1, %rsrc, %offset, %soffset, %aux : f32
+
+ llvm.return
+}
+
+llvm.func @rocdl.raw.buffer.atomic.i32(%rsrc : vector<4xi32>,
+ %offset : i32, %soffset : i32,
+ %vdata1 : i32) {
+ %aux = llvm.mlir.constant(0 : i32) : i32
+ // CHECK-LABEL: rocdl.raw.buffer.atomic.i32
+ // CHECK: call i32 @llvm.amdgcn.raw.buffer.atomic.smax.i32(i32 %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 {{.*}}
+ // CHECK: call i32 @llvm.amdgcn.raw.buffer.atomic.umin.i32(i32 %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 {{.*}}
+
+ rocdl.raw.buffer.atomic.smax %vdata1, %rsrc, %offset, %soffset, %aux : i32
+ rocdl.raw.buffer.atomic.umin %vdata1, %rsrc, %offset, %soffset, %aux : i32
llvm.return
}
More information about the Mlir-commits
mailing list