[Mlir-commits] [mlir] 4889214 - [mlir][sparse][gpu] generate single module, unique kernel names
Aart Bik
llvmlistbot at llvm.org
Sat Apr 15 17:25:50 PDT 2023
Author: Aart Bik
Date: 2023-04-15T17:25:36-07:00
New Revision: 4889214a48cf7c7d1949b833d5a2d4604448c96e
URL: https://github.com/llvm/llvm-project/commit/4889214a48cf7c7d1949b833d5a2d4604448c96e
DIFF: https://github.com/llvm/llvm-project/commit/4889214a48cf7c7d1949b833d5a2d4604448c96e.diff
LOG: [mlir][sparse][gpu] generate single module, unique kernel names
This fixes a TODO in the first version.
Reviewed By: Peiming
Differential Revision: https://reviews.llvm.org/D148406
Added:
mlir/test/Dialect/SparseTensor/GPU/gpu_combi.mlir
Modified:
mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
mlir/test/Dialect/SparseTensor/GPU/gpu_matmul.mlir
mlir/test/Dialect/SparseTensor/GPU/gpu_matvec.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
index 28b5f72c19c65..96346d97ebd00 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
@@ -40,24 +40,36 @@ static void markAsGPUContainer(ModuleOp topModule) {
UnitAttr::get(topModule->getContext()));
}
-/// Constructs a new GPU module (for GPU kernels) inside the given top module.
-static gpu::GPUModuleOp genGPUModule(OpBuilder &builder, ModuleOp topModule,
- StringRef name) {
+/// Constructs a new GPU module (for GPU kernels) inside the given top module,
+/// or returns an existing GPU module if one was built previously.
+static gpu::GPUModuleOp genGPUModule(OpBuilder &builder, ModuleOp topModule) {
+ for (auto op : topModule.getBodyRegion().getOps<gpu::GPUModuleOp>())
+ return op; // existing
markAsGPUContainer(topModule);
builder.setInsertionPointToStart(&topModule.getBodyRegion().front());
- return builder.create<gpu::GPUModuleOp>(topModule->getLoc(), name);
+ return builder.create<gpu::GPUModuleOp>(topModule->getLoc(),
+ "sparse_kernels");
}
/// Constructs a new GPU kernel in the given GPU module.
static gpu::GPUFuncOp genGPUFunc(OpBuilder &builder, gpu::GPUModuleOp gpuModule,
- StringRef name, SmallVectorImpl<Value> &args) {
+ SmallVectorImpl<Value> &args) {
+ // Get a unique kernel name. Not very creative,
+ // but we simply try kernel0, kernel1, etc.
+ unsigned kernelNumber = 0;
+ SmallString<16> kernelName;
+ do {
+ kernelName.clear();
+ ("kernel" + Twine(kernelNumber++)).toStringRef(kernelName);
+ } while (gpuModule.lookupSymbol(kernelName));
+ // Then we insert a new kernel with given arguments into the module.
builder.setInsertionPointToStart(&gpuModule.getBodyRegion().front());
SmallVector<Type> argsTp;
for (unsigned i = 0, e = args.size(); i < e; i++)
argsTp.push_back(args[i].getType());
FunctionType type = FunctionType::get(gpuModule->getContext(), argsTp, {});
auto gpuFunc =
- builder.create<gpu::GPUFuncOp>(gpuModule->getLoc(), name, type);
+ builder.create<gpu::GPUFuncOp>(gpuModule->getLoc(), kernelName, type);
gpuFunc->setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
builder.getUnitAttr());
return gpuFunc;
@@ -208,12 +220,9 @@ struct ForallRewriter : public OpRewritePattern<scf::ParallelOp> {
args.push_back(genHostRegisterMemref(rewriter, loc, b));
auto saveIp = rewriter.saveInsertionPoint();
// Set up GPU module and construct GPU function.
- //
- // TODO: only generate once, avoid name conflict
- //
ModuleOp topModule = forallOp->getParentOfType<ModuleOp>();
- auto gpuModule = genGPUModule(rewriter, topModule, "sparsekernels");
- auto gpuFunc = genGPUFunc(rewriter, gpuModule, "kernel", args);
+ auto gpuModule = genGPUModule(rewriter, topModule);
+ auto gpuFunc = genGPUFunc(rewriter, gpuModule, args);
genGPUCode(rewriter, gpuFunc, forallOp, constants, scalars, buffers);
// Generate code that launches the kernel.
rewriter.restoreInsertionPoint(saveIp);
diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_combi.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_combi.mlir
new file mode 100644
index 0000000000000..ec7c30e9468a2
--- /dev/null
+++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_combi.mlir
@@ -0,0 +1,29 @@
+// RUN: mlir-opt %s --linalg-generalize-named-ops \
+// RUN: --pre-sparsification-rewrite \
+// RUN: --sparsification="parallelization-strategy=dense-outer-loop" \
+// RUN: --sparse-gpu-codegen | FileCheck %s
+
+#CSR = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>
+
+//
+// CHECK-LABEL: gpu.module @sparse_kernels
+// CHECK-DAG: gpu.func @kernel0
+// CHECK-DAG: gpu.func @kernel1
+//
+// CHECK-LABEL: func.func @matmuls
+// CHECK-DAG: gpu.launch_func @sparse_kernels::@kernel0 blocks
+// CHECK-DAG: gpu.launch_func @sparse_kernels::@kernel1 blocks
+//
+func.func @matmuls(%A: tensor<1024x8xf64>,
+ %B: tensor<8x1024xf64, #CSR>,
+ %C: tensor<1024x1024xf64, #CSR>) -> tensor<1024x1024xf64> {
+ %Z = arith.constant dense<0.0> : tensor<1024x1024xf64>
+ %T = linalg.matmul
+ ins(%A, %B: tensor<1024x8xf64>, tensor<8x1024xf64, #CSR>)
+ outs(%Z: tensor<1024x1024xf64>) -> tensor<1024x1024xf64>
+ %D = linalg.matmul
+ ins(%T, %C: tensor<1024x1024xf64>, tensor<1024x1024xf64, #CSR>)
+ outs(%Z: tensor<1024x1024xf64>) -> tensor<1024x1024xf64>
+ return %D : tensor<1024x1024xf64>
+}
+
diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul.mlir
index e42bbb0924ac2..92d59416b32b5 100644
--- a/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul.mlir
+++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul.mlir
@@ -8,7 +8,8 @@
//
// Compute matrix matrix C = AB
//
-// CHECK-LABEL: gpu.func @kernel(
+// CHECK-LABEL: gpu.module @sparse_kernels
+// CHECK-LABEL: gpu.func @kernel0(
// CHECK-SAME: %[[VAL_0:.*0]]: index,
// CHECK-SAME: %[[VAL_1:.*1]]: index,
// CHECK-SAME: %[[VAL_2:.*2]]: memref<?xindex>,
@@ -51,7 +52,7 @@
// CHECK: gpu.host_register
// CHECK: gpu.host_register
// CHECK: gpu.host_register
-// CHECK: gpu.launch_func @sparsekernels::@kernel blocks
+// CHECK: gpu.launch_func @sparse_kernels::@kernel0 blocks
//
func.func @matmul(%A: tensor<?x?xf64, #CSR>, %B: tensor<?x?xf64>, %C_in: tensor<?x?xf64>) -> tensor<?x?xf64> {
%C_out = linalg.matmul
diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec.mlir
index 96b7f9dd31299..05dfc5829c8c6 100644
--- a/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec.mlir
+++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec.mlir
@@ -8,8 +8,8 @@
//
// Compute matrix vector y = Ax
//
-//
-// CHECK: gpu.func @kernel(
+// CHECK-LABEL: gpu.module @sparse_kernels
+// CHECK: gpu.func @kernel0(
// CHECK-SAME: %[[VAL_0:.*0]]: index,
// CHECK-SAME: %[[VAL_1:.*1]]: memref<?xf64>,
// CHECK-SAME: %[[VAL_2:.*2]]: memref<?xindex>,
@@ -48,7 +48,7 @@
// CHECK: gpu.host_register
// CHECK: gpu.host_register
// CHECK: gpu.host_register
-// CHECK: gpu.launch_func @sparsekernels::@kernel blocks
+// CHECK: gpu.launch_func @sparse_kernels::@kernel0 blocks
//
func.func @matvec(%A: tensor<?x?xf64, #CSR>, %x: tensor<?xf64>, %y_in: tensor<?xf64>) -> tensor<?xf64> {
%y_out = linalg.matvec
More information about the Mlir-commits
mailing list