[Mlir-commits] [mlir] 6c4cd7a - [mlir][sparse][gpu] refine sparse gpu round-trip and lowering test
Aart Bik
llvmlistbot at llvm.org
Thu Aug 10 17:19:08 PDT 2023
Author: Aart Bik
Date: 2023-08-10T17:18:59-07:00
New Revision: 6c4cd7a13e1ca8eef29894d269937a1bc8721bd2
URL: https://github.com/llvm/llvm-project/commit/6c4cd7a13e1ca8eef29894d269937a1bc8721bd2
DIFF: https://github.com/llvm/llvm-project/commit/6c4cd7a13e1ca8eef29894d269937a1bc8721bd2.diff
LOG: [mlir][sparse][gpu] refine sparse gpu round-trip and lowering test
Tests had become inconsistent, and contained a few slip ups
(e.g. non-async versions did not lower)
Reviewed By: K-Wu
Differential Revision: https://reviews.llvm.org/D157666
Added:
Modified:
mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
mlir/test/Dialect/GPU/sparse-roundtrip.mlir
Removed:
################################################################################
diff --git a/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
index 45b991bd4f8896..0c6d566321007a 100644
--- a/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-sparse-to-gpu-runtime-calls.mlir
@@ -54,6 +54,54 @@ module attributes {gpu.container_module} {
return
}
+ // CHECK-LABEL: func @spgemm
+ // CHECK: llvm.call @mgpuStreamCreate
+ // CHECK: llvm.call @mgpuMemAlloc
+ // CHECK: llvm.call @mgpuMemAlloc
+ // CHECK: llvm.call @mgpuCreateCsr
+ // CHECK: llvm.call @mgpuCreateCsr
+ // CHECK: llvm.call @mgpuCreateCsr
+ // CHECK: llvm.call @mgpuSpGEMMCreateDescr
+ // CHECK: llvm.call @mgpuSpGEMMWorkEstimation
+ // CHECK: llvm.call @mgpuSpGEMMCompute
+ // CHECK: llvm.call @mgpuSpGEMMGetSize
+ // CHECK: llvm.call @mgpuSetCsrPointers
+ // CHECK: llvm.call @mgpuSpGEMMCopy
+ // CHECK: llvm.call @mgpuSpGEMMDestroyDescr
+ // CHECK: llvm.call @mgpuDestroySpMat
+ // CHECK: llvm.call @mgpuDestroySpMat
+ // CHECK: llvm.call @mgpuDestroySpMat
+ // CHECK: llvm.call @mgpuStreamSynchronize
+ // CHECK: llvm.call @mgpuStreamDestroy
+ func.func @spgemm(%arg0: index) {
+ %token0 = gpu.wait async
+ %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
+ %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf32>
+ %spmatA, %token3 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
+ %spmatB, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
+ %spmatC, %token5 = gpu.create_csr async [%token4] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
+ %spgemmDesc, %token6 = gpu.spgemm_create_descr async [%token5]
+ %alloc = memref.alloc() : memref<0xi8> // nullptr
+ %c0 = arith.constant 0 : index
+ %bufferSz1, %token7 = gpu.spgemm_work_estimation_or_compute async
+ [%token6]{WORK_ESTIMATION}
+ %spmatA, %spmatB, %spmatC,
+ %spgemmDesc, %c0, %alloc: f32 into memref<0xi8>
+ %bufferSz2, %token8 = gpu.spgemm_work_estimation_or_compute async
+ [%token7]{COMPUTE}
+ %spmatA, %spmatB, %spmatC,
+ %spgemmDesc, %c0, %alloc: f32 into memref<0xi8>
+ %rows, %cols, %nnz, %token9 = gpu.spgemm_get_size async [%token8] %spmatC
+ %token10 = gpu.set_csr_pointers async [%token8] %spmatC, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
+ %token11 = gpu.spgemm_copy async [%token10] %spmatA, %spmatB, %spmatC, %spgemmDesc: f32
+ %token12 = gpu.spgemm_destroy_descr async [%token11] %spgemmDesc
+ %token13 = gpu.destroy_sp_mat async [%token12] %spmatA
+ %token14 = gpu.destroy_sp_mat async [%token13] %spmatB
+ %token15 = gpu.destroy_sp_mat async [%token14] %spmatC
+ gpu.wait [%token15]
+ return
+ }
+
// CHECK-LABEL: func @sddmm
// CHECK: llvm.call @mgpuStreamCreate
// CHECK: llvm.call @mgpuMemAlloc
@@ -80,69 +128,4 @@ module attributes {gpu.container_module} {
return
}
-
- // CHECK-LABEL: func @spgemm
- // CHECK: llvm.call @mgpuStreamCreate
- // CHECK: llvm.call @mgpuMemAlloc
- // CHECK: llvm.call @mgpuMemAlloc
- // CHECK: llvm.call @mgpuCreateCsr
- // CHECK: llvm.call @mgpuCreateCsr
- // CHECK: llvm.call @mgpuCreateCsr
- // CHECK: llvm.call @mgpuSpGEMMCreateDescr
- // CHECK: llvm.call @malloc
- // CHECK: llvm.call @mgpuSpGEMMWorkEstimation
- // CHECK: llvm.call @mgpuMemAlloc
- // CHECK: llvm.call @mgpuSpGEMMWorkEstimation
- // CHECK: llvm.call @mgpuMemAlloc
- // CHECK: llvm.call @mgpuSpGEMMCompute
- // CHECK: llvm.call @mgpuMemAlloc
- // CHECK: llvm.call @mgpuMemAlloc
- // CHECK: llvm.call @mgpuStreamSynchronize
- // CHECK: llvm.call @mgpuStreamDestroy
- // CHECK: llvm.call @mgpuStreamCreate
- // CHECK: llvm.call @mgpuSpGEMMCopy
- // CHECK: llvm.call @mgpuDestroySpMat
- // CHECK: llvm.call @mgpuDestroySpMat
- // CHECK: llvm.call @mgpuDestroySpMat
- // CHECK: llvm.call @mgpuStreamSynchronize
- // CHECK: llvm.call @mgpuStreamDestroy
- func.func @spgemm(%arg0: index) {
- %token0 = gpu.wait async
- %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
- %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
- %spmatA, %token3 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
- %spmatB, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
- %spmatC, %token5 = gpu.create_csr async [%token4] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
- %spgemmDesc, %token6 = gpu.spgemm_create_descr async [%token5]
- // Used as nullptr
- %alloc = memref.alloc() : memref<0xi8>
- %c0 = arith.constant 0 : index
- %bufferSz1, %token7 = gpu.spgemm_work_estimation_or_compute async
- [%token6]{WORK_ESTIMATION}
- %spmatA{NON_TRANSPOSE}, %spmatB{NON_TRANSPOSE},
- %spmatC, %spgemmDesc, %c0,
- %alloc: f32 into memref<0xi8>
- %buf1, %token8 = gpu.alloc async [%token7] (%bufferSz1) : memref<?xi8>
- %bufferSz1_1, %token9 = gpu.spgemm_work_estimation_or_compute async
- [%token8]{WORK_ESTIMATION} %spmatA, %spmatB,
- %spmatC, %spgemmDesc, %bufferSz1,
- %buf1: f32 into memref<?xi8>
- %buf2, %token13 = gpu.alloc async [%token9] (%bufferSz1_1) : memref<?xi8>
- %bufferSz2_2, %token14 = gpu.spgemm_work_estimation_or_compute async
- [%token13]{COMPUTE} %spmatA, %spmatB, %spmatC,
- %spgemmDesc, %bufferSz1_1,
- %buf2: f32 into memref<?xi8>
- %rows, %cols, %nnz, %token15 = gpu.spgemm_get_size async [%token14] %spmatC
- %mem_columns, %token16 = gpu.alloc async [%token15] (%cols) : memref<?xi32>
- %mem_values, %token17 = gpu.alloc async [%token16] (%nnz) : memref<?xf32>
- gpu.wait [%token17]
- %token18 = gpu.wait async
- %token19 = gpu.spgemm_copy async [%token18] %spmatA, %spmatB, %spmatC, %spgemmDesc: f32
- %token20 = gpu.destroy_sp_mat async [%token19] %spmatA
- %token21 = gpu.destroy_sp_mat async [%token20] %spmatB
- %token22 = gpu.destroy_sp_mat async [%token21] %spmatC
- gpu.wait [%token22]
- return
- }
-
}
diff --git a/mlir/test/Dialect/GPU/sparse-roundtrip.mlir b/mlir/test/Dialect/GPU/sparse-roundtrip.mlir
index 171a1ad24898ff..309743d980517e 100644
--- a/mlir/test/Dialect/GPU/sparse-roundtrip.mlir
+++ b/mlir/test/Dialect/GPU/sparse-roundtrip.mlir
@@ -54,24 +54,25 @@ module attributes {gpu.container_module} {
return
}
- // CHECK-LABEL: func @spgemm
- // CHECK: %{{.*}} = gpu.wait async
- // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xindex>
- // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xf32>
- // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf32>
- // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf32>
- // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf32>
- // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_create_descr async [%{{.*}}]
- // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_work_estimation_or_compute async [%{{.*}}]{ WORK_ESTIMATION} %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 into memref<0xi8>
- // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_work_estimation_or_compute async [%{{.*}}]{ COMPUTE} %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 into memref<0xi8>
- // CHECK: %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} = gpu.spgemm_get_size async [%{{.*}}] %{{.*}}
- // CHECK %{{.*}} = gpu.set_csr_pointers async [%{{.*}}] %{{.*}}, {{.*}}, {{.*}}, {{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf32>
- // CHECK: %{{.*}} = gpu.spgemm_copy async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32
- // CHECK: %{{.*}} = gpu.spgemm_destroy_descr async [%{{.*}}] %{{.*}}
- // CHECK: gpu.destroy_sp_mat %{{.*}}
- // CHECK: gpu.destroy_sp_mat %{{.*}}
- // CHECK: gpu.destroy_sp_mat %{{.*}}
- // CHECK: return
+ // CHECK-LABEL: func @spgemm
+ // CHECK: %{{.*}} = gpu.wait async
+ // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xindex>
+ // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xf32>
+ // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf32>
+ // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf32>
+ // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf32>
+ // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_create_descr async [%{{.*}}]
+ // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_work_estimation_or_compute async [%{{.*}}]{ WORK_ESTIMATION} %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 into memref<0xi8>
+ // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_work_estimation_or_compute async [%{{.*}}]{ COMPUTE} %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 into memref<0xi8>
+ // CHECK: %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} = gpu.spgemm_get_size async [%{{.*}}] %{{.*}}
+ // CHECK: %{{.*}} = gpu.set_csr_pointers async [%{{.*}}] %{{.*}}, {{.*}}, {{.*}}, {{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf32>
+ // CHECK: %{{.*}} = gpu.spgemm_copy async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32
+ // CHECK: %{{.*}} = gpu.spgemm_destroy_descr async [%{{.*}}] %{{.*}}
+ // CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}}
+ // CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}}
+ // CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}}
+ // CHECK: gpu.wait [%{{.*}}]
+ // CHECK: return
func.func @spgemm(%arg0: index) {
%token0 = gpu.wait async
%mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
@@ -94,9 +95,10 @@ module attributes {gpu.container_module} {
%token10 = gpu.set_csr_pointers async [%token8] %spmatC, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
%token11 = gpu.spgemm_copy async [%token10] %spmatA, %spmatB, %spmatC, %spgemmDesc: f32
%token12 = gpu.spgemm_destroy_descr async [%token11] %spgemmDesc
- gpu.destroy_sp_mat %spmatA
- gpu.destroy_sp_mat %spmatB
- gpu.destroy_sp_mat %spmatC
+ %token13 = gpu.destroy_sp_mat async [%token12] %spmatA
+ %token14 = gpu.destroy_sp_mat async [%token13] %spmatB
+ %token15 = gpu.destroy_sp_mat async [%token14] %spmatC
+ gpu.wait [%token15]
return
}
More information about the Mlir-commits
mailing list