[Mlir-commits] [mlir] 82ac02e - Add scalar support for amdgpu.raw_buffer_{load, store}
Krzysztof Drewniak
llvmlistbot at llvm.org
Mon Mar 20 13:19:27 PDT 2023
Author: giuseros
Date: 2023-03-20T20:19:20Z
New Revision: 82ac02e4a86070cf9924c245ff340aba1f62b45b
URL: https://github.com/llvm/llvm-project/commit/82ac02e4a86070cf9924c245ff340aba1f62b45b
DIFF: https://github.com/llvm/llvm-project/commit/82ac02e4a86070cf9924c245ff340aba1f62b45b.diff
LOG: Add scalar support for amdgpu.raw_buffer_{load,store}
Introduce the possibility to load/store scalars via amdgpu.raw_buffer_{load,store}
Reviewed By: krzysz00
Differential Revision: https://reviews.llvm.org/D146413
Added:
Modified:
mlir/include/mlir/Dialect/AMDGPU/AMDGPU.td
mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
mlir/test/Dialect/AMDGPU/ops.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/AMDGPU/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/AMDGPU.td
index 92ded941aa7c0..3589fa607b72d 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/AMDGPU.td
@@ -87,7 +87,7 @@ def AMDGPU_RawBufferLoadOp :
let assemblyFormat = [{
attr-dict $memref `[` $indices `]`
(`sgprOffset` $sgprOffset^)? `:`
- type($memref) `,` type($indices) `->` type($value)
+ type($memref) (`,` type($indices)^)? `->` type($value)
}];
let hasCanonicalizer = 1;
let hasVerifier = 1;
@@ -130,7 +130,7 @@ def AMDGPU_RawBufferStoreOp :
let assemblyFormat = [{
attr-dict $value `->` $memref `[` $indices `]`
(`sgprOffset` $sgprOffset^)? `:`
- type($value) `->` type($memref) `,` type($indices)
+ type($value) `->` type($memref) (`,` type($indices)^)?
}];
let hasCanonicalizer = 1;
let hasVerifier = 1;
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 749f569fa7868..d4ee6e7e5d347 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -199,7 +199,7 @@ struct RawBufferOpLowering : public ConvertOpToLLVMPattern<GpuOp> {
args.push_back(resource);
// Indexing (voffset)
- Value voffset;
+ Value voffset = createI32Constant(rewriter, loc, 0);
for (auto pair : llvm::enumerate(adaptor.getIndices())) {
size_t i = pair.index();
Value index = pair.value();
@@ -212,8 +212,7 @@ struct RawBufferOpLowering : public ConvertOpToLLVMPattern<GpuOp> {
createI32Constant(rewriter, loc, strides[i] * elementByteWidth);
}
index = rewriter.create<LLVM::MulOp>(loc, index, strideOp);
- voffset =
- voffset ? rewriter.create<LLVM::AddOp>(loc, voffset, index) : index;
+ voffset = rewriter.create<LLVM::AddOp>(loc, voffset, index);
}
if (adaptor.getIndexOffset()) {
int32_t indexOffset = *gpuOp.getIndexOffset() * elementByteWidth;
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
index 183fdb61db6cb..a26add21cefb6 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir
@@ -1,6 +1,26 @@
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx908 | FileCheck %s
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1030 | FileCheck %s --check-prefix=RDNA
+// CHECK-LABEL: func @gpu_gcn_raw_buffer_load_scalar_i32
+func.func @gpu_gcn_raw_buffer_load_scalar_i32(%buf: memref<i32>) -> i32 {
+ // CHECK: %[[ptr:.*]] = llvm.ptrtoint
+ // CHECK: %[[lowHalf:.*]] = llvm.trunc %[[ptr]] : i64 to i32
+ // CHECK: %[[resource_1:.*]] = llvm.insertelement %[[lowHalf]]
+ // CHECK: %[[highHalfI64:.*]] = llvm.lshr %[[ptr]]
+ // CHECK: %[[highHalfI32:.*]] = llvm.trunc %[[highHalfI64]] : i64 to i32
+ // CHECK: %[[highHalf:.*]] = llvm.and %[[highHalfI32]], %{{.*}} : i32
+ // CHECK: %[[resource_2:.*]] = llvm.insertelement %[[highHalf]], %[[resource_1]]
+ // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(4 : i32)
+ // CHECK: %[[resource_3:.*]] = llvm.insertelement %[[numRecords]], %[[resource_2]]
+ // CHECK: %[[word3:.*]] = llvm.mlir.constant(159744 : i32)
+ // RDNA: %[[word3:.*]] = llvm.mlir.constant(822243328 : i32)
+ // CHECK: %[[resource:.*]] = llvm.insertelement %[[word3]], %[[resource_3]]
+ // CHECK: %[[ret:.*]] = rocdl.raw.buffer.load %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
+ // CHECK: return %[[ret]]
+ %0 = amdgpu.raw_buffer_load {boundsCheck = true} %buf[] : memref<i32> -> i32
+ func.return %0 : i32
+}
+
// CHECK-LABEL: func @gpu_gcn_raw_buffer_load_i32
func.func @gpu_gcn_raw_buffer_load_i32(%buf: memref<64xi32>, %idx: i32) -> i32 {
// CHECK: %[[ptr:.*]] = llvm.ptrtoint
@@ -94,6 +114,17 @@ func.func @gpu_gcn_raw_buffer_load_4xf8E4M3FNUZ(%buf: memref<64xf8E4M3FNUZ>, %id
}
// Since the lowering logic is shared with loads, only bitcasts need to be rechecked
+// CHECK-LABEL: func @gpu_gcn_raw_buffer_store_scalar_i32
+func.func @gpu_gcn_raw_buffer_store_scalar_i32(%value: i32, %buf: memref<i32>) {
+ // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(4 : i32)
+ // CHECK: llvm.insertelement{{.*}}%[[numRecords]]
+ // CHECK: %[[word3:.*]] = llvm.mlir.constant(159744 : i32)
+ // CHECK: %[[resource:.*]] = llvm.insertelement{{.*}}%[[word3]]
+ // CHECK: rocdl.raw.buffer.store %{{.*}} %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
+ amdgpu.raw_buffer_store {boundsCheck = true} %value -> %buf[] : i32 -> memref<i32>
+ func.return
+}
+
// CHECK-LABEL: func @gpu_gcn_raw_buffer_store_i32
func.func @gpu_gcn_raw_buffer_store_i32(%value: i32, %buf: memref<64xi32>, %idx: i32) {
// CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i32)
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 0e13e1ecfd66c..80bd7d4655e8b 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -18,6 +18,13 @@ func.func @raw_buffer_load_f32_from_rank_4(%src : memref<128x64x32x16xf32>, %off
func.return %0 : f32
}
+// CHECK-LABEL: func @raw_buffer_load_scalar
+func.func @raw_buffer_load_scalar(%src : memref<f32>) -> f32 {
+ // CHECK: amdgpu.raw_buffer_load {indexOffset = 1 : i32} %{{.*}}[] : memref<f32> -> f32
+ %0 = amdgpu.raw_buffer_load {indexOffset = 1 : i32} %src[] : memref<f32> -> f32
+ func.return %0 : f32
+}
+
// CHECK-LABEL: func @raw_buffer_load_4xf32_from_rank_4
func.func @raw_buffer_load_4xf32_from_rank_4(%src : memref<128x64x32x16xf32>, %offset : i32, %idx0 : i32, %idx1 : i32, %idx2 : i32, %idx3 : i32) -> vector<4xf32> {
// CHECK: amdgpu.raw_buffer_load {indexOffset = 1 : i32} %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] sgprOffset %{{.*}} : memref<128x64x32x16xf32>, i32, i32, i32, i32 -> vector<4xf32>
@@ -46,6 +53,13 @@ func.func @raw_buffer_store_4xf32_to_rank_4(%value : vector<4xf32>, %dst : memre
func.return
}
+// CHECK-LABEL: func @raw_buffer_store_scalar
+func.func @raw_buffer_store_scalar(%value : f32, %dst : memref<f32>) {
+ // CHECK: amdgpu.raw_buffer_store {indexOffset = 1 : i32} %{{.*}} -> %{{.*}}[] : f32 -> memref<f32>
+ amdgpu.raw_buffer_store {indexOffset = 1 : i32} %value -> %dst[] : f32 -> memref<f32>
+ func.return
+}
+
// CHECK-LABEL: func @raw_buffer_atomic_fadd_f32_to_rank_1
func.func @raw_buffer_atomic_fadd_f32_to_rank_1(%value : f32, %dst : memref<128xf32>, %offset : i32, %idx0 : i32) {
// CHECK: amdgpu.raw_buffer_atomic_fadd {indexOffset = 1 : i32} %{{.*}} -> %{{.*}}[{{.*}}] sgprOffset %{{.*}} : f32 -> memref<128xf32>, i32
More information about the Mlir-commits
mailing list