[Mlir-commits] [mlir] 289f723 - [mlir][sparse][gpu] minor code cleanup for sparse gpu ops
Aart Bik
llvmlistbot at llvm.org
Mon Aug 14 15:09:06 PDT 2023
Author: Aart Bik
Date: 2023-08-14T15:08:57-07:00
New Revision: 289f7231f99f9c3e7f3e532e3532427648b355dc
URL: https://github.com/llvm/llvm-project/commit/289f7231f99f9c3e7f3e532e3532427648b355dc
DIFF: https://github.com/llvm/llvm-project/commit/289f7231f99f9c3e7f3e532e3532427648b355dc.diff
LOG: [mlir][sparse][gpu] minor code cleanup for sparse gpu ops
Consistent order of ops and related methods.
Also, renamed SpGEMMGetSizeOp to SpMatGetSizeOp
since this is a general utility for sparse matrices,
not specific to GEMM ops only.
Reviewed By: Peiming
Differential Revision: https://reviews.llvm.org/D157922
Added:
Modified:
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
mlir/test/Dialect/GPU/sparse-roundtrip.mlir
mlir/test/Dialect/SparseTensor/GPU/gpu_spgemm_lib.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 5921df9fa5e8ce..ddd3e58a3628ae 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -2391,10 +2391,10 @@ def GPU_SpGEMMCopyOp : GPU_Op<"spgemm_copy", [GPU_AsyncOpInterface]> {
}];
}
-def GPU_SpGEMMGetSizeOp : GPU_Op<"spgemm_get_size", [GPU_AsyncOpInterface]> {
- let summary = "SpGEMM get size operation";
+def GPU_SpMatGetSizeOp : GPU_Op<"spmat_get_size", [GPU_AsyncOpInterface]> {
+ let summary = "SpMat get size operation";
let description = [{
- The `gpu.spgemm_get_size` operation retrieves the number of rows, number of
+ The `gpu.spmat_get_size` operation retrieves the number of rows, number of
columns, and number of non-zero elements of a sparse matrix.
If the `async` keyword is present, the op is executed asynchronously (i.e.
@@ -2404,7 +2404,7 @@ def GPU_SpGEMMGetSizeOp : GPU_Op<"spgemm_get_size", [GPU_AsyncOpInterface]> {
Example:
```mlir
- %rows, %cols, %nnz, %token = gpu.spgemm_get_size async [%dep] %spmatC
+ %rows, %cols, %nnz, %token = gpu.spmat_get_size async [%dep] %spmatC
```
}];
diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
index e0e9a7169bc6b9..a51fbb9b032862 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
@@ -296,6 +296,14 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern<OpTy> {
llvmVoidType,
{llvmPointerType, llvmPointerType, llvmPointerType, llvmPointerType,
llvmPointerType, llvmPointerType, llvmPointerType /*void *stream*/}};
+ FunctionCallBuilder createSpGEMMCreateDescrBuilder = {
+ "mgpuSpGEMMCreateDescr",
+ llvmPointerType,
+ {llvmPointerType /*void *stream*/}};
+ FunctionCallBuilder createSpGEMMDestroyDescrBuilder = {
+ "mgpuSpGEMMDestroyDescr",
+ llvmVoidType,
+ {llvmPointerType /*s*/, llvmPointerType /*void *stream*/}};
FunctionCallBuilder createSpGEMMWorkEstimationBuilder = {
"mgpuSpGEMMWorkEstimation",
llvmIntPtrType,
@@ -316,16 +324,8 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern<OpTy> {
{llvmPointerType /*s*/, llvmInt32Type /*ma*/, llvmInt32Type /*mb*/,
llvmPointerType /*a*/, llvmPointerType /*b*/, llvmPointerType /*c*/,
llvmInt32Type /*ctp*/, llvmPointerType /*void *stream*/}};
- FunctionCallBuilder createSpGEMMCreateDescrBuilder = {
- "mgpuSpGEMMCreateDescr",
- llvmPointerType,
- {llvmPointerType /*void *stream*/}};
- FunctionCallBuilder createSpGEMMDestroyDescrBuilder = {
- "mgpuSpGEMMDestroyDescr",
- llvmVoidType,
- {llvmPointerType /*s*/, llvmPointerType /*void *stream*/}};
- FunctionCallBuilder createSpGEMMGetSizeBuilder = {
- "mgpuSpGEMMGetSize",
+ FunctionCallBuilder createSpMatGetSizeBuilder = {
+ "mgpuSpMatGetSize",
llvmVoidType,
{llvmPointerType /*mc*/, llvmPointerType /*rc*/, llvmPointerType /*cc*/,
llvmPointerType /*nc*/, llvmPointerType /*void *stream*/}};
@@ -564,7 +564,7 @@ DECLARE_CONVERT_OP_TO_GPU_RUNTIME_CALL_PATTERN(SpGEMMCreateDescrOp)
DECLARE_CONVERT_OP_TO_GPU_RUNTIME_CALL_PATTERN(SpGEMMDestroyDescrOp)
DECLARE_CONVERT_OP_TO_GPU_RUNTIME_CALL_PATTERN(SpGEMMWorkEstimationOrComputeOp)
DECLARE_CONVERT_OP_TO_GPU_RUNTIME_CALL_PATTERN(SpGEMMCopyOp)
-DECLARE_CONVERT_OP_TO_GPU_RUNTIME_CALL_PATTERN(SpGEMMGetSizeOp)
+DECLARE_CONVERT_OP_TO_GPU_RUNTIME_CALL_PATTERN(SpMatGetSizeOp)
DECLARE_CONVERT_OP_TO_GPU_RUNTIME_CALL_PATTERN(SetCsrPointersOp)
} // namespace
@@ -1852,8 +1852,8 @@ LogicalResult ConvertSpGEMMCopyOpToGpuRuntimeCallPattern::matchAndRewrite(
return success();
}
-LogicalResult ConvertSpGEMMGetSizeOpToGpuRuntimeCallPattern::matchAndRewrite(
- gpu::SpGEMMGetSizeOp op, OpAdaptor adaptor,
+LogicalResult ConvertSpMatGetSizeOpToGpuRuntimeCallPattern::matchAndRewrite(
+ gpu::SpMatGetSizeOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const {
if (failed(areAllLLVMTypes(op, adaptor.getOperands(), rewriter)) ||
failed(isAsyncWithOneDependency(rewriter, op)))
@@ -1878,7 +1878,7 @@ LogicalResult ConvertSpGEMMGetSizeOpToGpuRuntimeCallPattern::matchAndRewrite(
loc, llvmInt64PointerType, llvmInt64PointerType, buffer,
ValueRange{rewriter.create<LLVM::ConstantOp>(loc, getIndexType(),
rewriter.getIndexAttr(2))});
- createSpGEMMGetSizeBuilder.create(
+ createSpMatGetSizeBuilder.create(
loc, rewriter, {adaptor.getSpmat(), rowsPtr, colsPtr, nnzsPtr, stream});
auto rows = rewriter.create<LLVM::LoadOp>(loc, llvmInt64Type, rowsPtr);
auto cols = rewriter.create<LLVM::LoadOp>(loc, llvmInt64Type, colsPtr);
@@ -1950,7 +1950,7 @@ void mlir::populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter,
ConvertSpGEMMDestroyDescrOpToGpuRuntimeCallPattern,
ConvertSpGEMMWorkEstimationOrComputeOpToGpuRuntimeCallPattern,
ConvertSpGEMMCopyOpToGpuRuntimeCallPattern,
- ConvertSpGEMMGetSizeOpToGpuRuntimeCallPattern,
+ ConvertSpMatGetSizeOpToGpuRuntimeCallPattern,
ConvertSetCsrPointersOpToGpuRuntimeCallPattern>(converter);
patterns.add<ConvertLaunchFuncOpToGpuRuntimeCallPattern>(
converter, gpuBinaryAnnotation, kernelBarePtrCallConv, cachedModuleTable);
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
index 98a61b19fc55ea..737058c543dace 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
@@ -844,7 +844,7 @@ rewriteSpGEMM(PatternRewriter &rewriter, linalg::GenericOp op, bool enableRT,
token = compute2->getResult(1);
// Get sizes.
- Operation *sizes = rewriter.create<gpu::SpGEMMGetSizeOp>(
+ Operation *sizes = rewriter.create<gpu::SpMatGetSizeOp>(
loc, indexTp, indexTp, indexTp, tokenTp, token, spMatC);
Value nnz = sizes->getResult(2);
token = sizes->getResult(3);
diff --git a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
index fd338a14c504ef..b4aa90187f572a 100644
--- a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
+++ b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
@@ -603,6 +603,19 @@ extern "C" MLIR_CUDA_WRAPPERS_EXPORT void mgpuSDDMM(int32_t ma, int32_t mb,
CUSPARSE_SDDMM_ALG_DEFAULT, buf))
}
+extern "C" MLIR_CUDA_WRAPPERS_EXPORT void *
+mgpuSpGEMMCreateDescr(CUstream /*stream*/) {
+ cusparseSpGEMMDescr_t spgemmDesc = nullptr;
+ CUSPARSE_REPORT_IF_ERROR(cusparseSpGEMM_createDescr(&spgemmDesc))
+ return reinterpret_cast<void *>(spgemmDesc);
+}
+
+extern "C" MLIR_CUDA_WRAPPERS_EXPORT void
+mgpuSpGEMMDestroyDescr(void *s, CUstream /*stream*/) {
+ cusparseSpGEMMDescr_t spgemmDesc = reinterpret_cast<cusparseSpGEMMDescr_t>(s);
+ CUSPARSE_REPORT_IF_ERROR(cusparseSpGEMM_destroyDescr(spgemmDesc))
+}
+
extern "C" MLIR_CUDA_WRAPPERS_EXPORT intptr_t mgpuSpGEMMWorkEstimation(
void *s, int32_t ma, int32_t mb, void *a, void *b, void *c, int32_t ctp,
intptr_t bs, void *buf, CUstream /*stream*/) {
@@ -655,21 +668,8 @@ mgpuSpGEMMCopy(void *s, int32_t ma, int32_t mb, void *a, void *b, void *c,
matC, cTp, CUSPARSE_SPGEMM_DEFAULT, spgemmDesc))
}
-extern "C" MLIR_CUDA_WRAPPERS_EXPORT void *
-mgpuSpGEMMCreateDescr(CUstream /*stream*/) {
- cusparseSpGEMMDescr_t spgemmDesc = nullptr;
- CUSPARSE_REPORT_IF_ERROR(cusparseSpGEMM_createDescr(&spgemmDesc))
- return reinterpret_cast<void *>(spgemmDesc);
-}
-
-extern "C" MLIR_CUDA_WRAPPERS_EXPORT void
-mgpuSpGEMMDestroyDescr(void *s, CUstream /*stream*/) {
- cusparseSpGEMMDescr_t spgemmDesc = reinterpret_cast<cusparseSpGEMMDescr_t>(s);
- CUSPARSE_REPORT_IF_ERROR(cusparseSpGEMM_destroyDescr(spgemmDesc))
-}
-
extern "C" MLIR_CUDA_WRAPPERS_EXPORT void
-mgpuSpGEMMGetSize(void *m, void *r, void *c, void *n, CUstream /*stream*/) {
+mgpuSpMatGetSize(void *m, void *r, void *c, void *n, CUstream /*stream*/) {
cusparseConstSpMatDescr_t matDescr =
reinterpret_cast<cusparseConstSpMatDescr_t>(m);
int64_t *rows = reinterpret_cast<int64_t *>(r);
diff --git a/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
index 0c6d566321007a..d9c1c98a020830 100644
--- a/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
@@ -64,7 +64,7 @@ module attributes {gpu.container_module} {
// CHECK: llvm.call @mgpuSpGEMMCreateDescr
// CHECK: llvm.call @mgpuSpGEMMWorkEstimation
// CHECK: llvm.call @mgpuSpGEMMCompute
- // CHECK: llvm.call @mgpuSpGEMMGetSize
+ // CHECK: llvm.call @mgpuSpMatGetSize
// CHECK: llvm.call @mgpuSetCsrPointers
// CHECK: llvm.call @mgpuSpGEMMCopy
// CHECK: llvm.call @mgpuSpGEMMDestroyDescr
@@ -91,7 +91,7 @@ module attributes {gpu.container_module} {
[%token7]{COMPUTE}
%spmatA, %spmatB, %spmatC,
%spgemmDesc, %c0, %alloc: f32 into memref<0xi8>
- %rows, %cols, %nnz, %token9 = gpu.spgemm_get_size async [%token8] %spmatC
+ %rows, %cols, %nnz, %token9 = gpu.spmat_get_size async [%token8] %spmatC
%token10 = gpu.set_csr_pointers async [%token8] %spmatC, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
%token11 = gpu.spgemm_copy async [%token10] %spmatA, %spmatB, %spmatC, %spgemmDesc: f32
%token12 = gpu.spgemm_destroy_descr async [%token11] %spgemmDesc
diff --git a/mlir/test/Dialect/GPU/sparse-roundtrip.mlir b/mlir/test/Dialect/GPU/sparse-roundtrip.mlir
index 309743d980517e..31273ef8c8ce61 100644
--- a/mlir/test/Dialect/GPU/sparse-roundtrip.mlir
+++ b/mlir/test/Dialect/GPU/sparse-roundtrip.mlir
@@ -64,7 +64,7 @@ module attributes {gpu.container_module} {
// CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_create_descr async [%{{.*}}]
// CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_work_estimation_or_compute async [%{{.*}}]{ WORK_ESTIMATION} %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 into memref<0xi8>
// CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_work_estimation_or_compute async [%{{.*}}]{ COMPUTE} %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 into memref<0xi8>
- // CHECK: %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} = gpu.spgemm_get_size async [%{{.*}}] %{{.*}}
+ // CHECK: %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} = gpu.spmat_get_size async [%{{.*}}] %{{.*}}
// CHECK: %{{.*}} = gpu.set_csr_pointers async [%{{.*}}] %{{.*}}, {{.*}}, {{.*}}, {{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf32>
// CHECK: %{{.*}} = gpu.spgemm_copy async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32
// CHECK: %{{.*}} = gpu.spgemm_destroy_descr async [%{{.*}}] %{{.*}}
@@ -91,7 +91,7 @@ module attributes {gpu.container_module} {
[%token7]{COMPUTE}
%spmatA, %spmatB, %spmatC,
%spgemmDesc, %c0, %alloc: f32 into memref<0xi8>
- %rows, %cols, %nnz, %token9 = gpu.spgemm_get_size async [%token8] %spmatC
+ %rows, %cols, %nnz, %token9 = gpu.spmat_get_size async [%token8] %spmatC
%token10 = gpu.set_csr_pointers async [%token8] %spmatC, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
%token11 = gpu.spgemm_copy async [%token10] %spmatA, %spmatB, %spmatC, %spgemmDesc: f32
%token12 = gpu.spgemm_destroy_descr async [%token11] %spgemmDesc
diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_spgemm_lib.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_spgemm_lib.mlir
index 00420b9a49bcd9..d880a9688077bd 100644
--- a/mlir/test/Dialect/SparseTensor/GPU/gpu_spgemm_lib.mlir
+++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_spgemm_lib.mlir
@@ -57,7 +57,7 @@
// CHECK: %[[VAL_65:.*]], %[[VAL_66:.*]] = gpu.spgemm_work_estimation_or_compute async {{\[}}%[[VAL_64]]]{ COMPUTE} %[[VAL_45]], %[[VAL_47]], %[[VAL_55]], %[[VAL_57]], %[[VAL_3]], %[[VAL_53]] : f32 into memref<?xf32>
// CHECK: %[[VAL_67:.*]], %[[VAL_68:.*]] = gpu.alloc async {{\[}}%[[VAL_66]]] (%[[VAL_65]]) : memref<?xi8>
// CHECK: %[[VAL_69:.*]], %[[VAL_70:.*]] = gpu.spgemm_work_estimation_or_compute async {{\[}}%[[VAL_68]]]{ COMPUTE} %[[VAL_45]], %[[VAL_47]], %[[VAL_55]], %[[VAL_57]], %[[VAL_65]], %[[VAL_67]] : f32 into memref<?xi8>
-// CHECK: %[[VAL_71:.*]], %[[VAL_72:.*]], %[[VAL_73:.*]], %[[VAL_74:.*]] = gpu.spgemm_get_size async {{\[}}%[[VAL_70]]] %[[VAL_55]]
+// CHECK: %[[VAL_71:.*]], %[[VAL_72:.*]], %[[VAL_73:.*]], %[[VAL_74:.*]] = gpu.spmat_get_size async {{\[}}%[[VAL_70]]] %[[VAL_55]]
// CHECK: %[[VAL_75:.*]], %[[VAL_76:.*]] = gpu.alloc async {{\[}}%[[VAL_74]]] (%[[VAL_73]]) : memref<?xindex>
// CHECK: %[[VAL_77:.*]], %[[VAL_78:.*]] = gpu.alloc async {{\[}}%[[VAL_76]]] (%[[VAL_73]]) : memref<?xf32>
// CHECK: %[[VAL_79:.*]] = gpu.set_csr_pointers async {{\[}}%[[VAL_78]]] %[[VAL_55]], %[[VAL_49]], %[[VAL_75]], %[[VAL_77]] : memref<?xindex>, memref<?xindex>, memref<?xf32>
More information about the Mlir-commits
mailing list