[Mlir-commits] [mlir] [mlir][sparse] add parallelization options to mini pipeline (PR #104233)
Yinying Li
llvmlistbot at llvm.org
Fri Aug 30 00:11:11 PDT 2024
https://github.com/yinying-lisa-li updated https://github.com/llvm/llvm-project/pull/104233
>From 9c5056acd7f22bce0fdab4c2546c4569cd998d0e Mon Sep 17 00:00:00 2001
From: Yinying Li <yinyingli at google.com>
Date: Wed, 14 Aug 2024 20:46:03 +0000
Subject: [PATCH 1/5] add parallelization options
---
.../Dialect/SparseTensor/Transforms/Passes.td | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
index 8ec18a1e186481..71617ee8a63d0d 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
@@ -487,6 +487,23 @@ def SparsificationAndBufferization : Pass<"sparsification-and-bufferization", "M
"Enable i32 indexing into vectors (for efficient gather/scatter)">,
Option<"enableGPULibgen", "enable-gpu-libgen", "bool", "false",
"Enable GPU acceleration by means of direct library calls">,
+ Option<"parallelization", "parallelization-strategy", "mlir::SparseParallelizationStrategy",
+ "mlir::SparseParallelizationStrategy::kNone",
+ "Set the parallelization strategy", [{llvm::cl::values(
+ clEnumValN(mlir::SparseParallelizationStrategy::kNone, "none",
+ "Turn off sparse parallelization."),
+ clEnumValN(mlir::SparseParallelizationStrategy::kDenseOuterLoop,
+ "dense-outer-loop",
+ "Enable dense outer loop sparse parallelization."),
+ clEnumValN(mlir::SparseParallelizationStrategy::kAnyStorageOuterLoop,
+ "any-storage-outer-loop",
+ "Enable sparse parallelization regardless of storage for the outer loop."),
+ clEnumValN(mlir::SparseParallelizationStrategy::kDenseAnyLoop,
+ "dense-any-loop",
+ "Enable dense parallelization for any loop."),
+ clEnumValN(mlir::SparseParallelizationStrategy::kAnyStorageAnyLoop,
+ "any-storage-any-loop",
+ "Enable sparse parallelization for any storage and loop."))}]>,
Option<"sparseEmitStrategy", "sparse-emit-strategy", "mlir::SparseEmitStrategy",
"mlir::SparseEmitStrategy::kFunctional",
"Emit functional code or interfaces (to debug) for sparse loops", [{llvm::cl::values(
>From 277b8de64d2e1a4bd9e3516b1ca2202af9d2a61c Mon Sep 17 00:00:00 2001
From: Yinying Li <yinyingli at google.com>
Date: Wed, 14 Aug 2024 23:57:17 +0000
Subject: [PATCH 2/5] Add parallelization strategy to sparsification and
bufferization pass.
---
.../Dialect/SparseTensor/Transforms/Passes.h | 3 +-
.../Pipelines/SparseTensorPipelines.cpp | 3 +-
.../SparsificationAndBufferizationPass.cpp | 13 ++++++--
.../SparseTensor/minipipeline_vector.mlir | 30 +++++++++++++++++++
4 files changed, 44 insertions(+), 5 deletions(-)
diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h
index 8413691910189a..d22df6a7857c1d 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h
@@ -263,7 +263,8 @@ std::unique_ptr<Pass> createSparsificationAndBufferizationPass(
bool createSparseDeallocs, bool enableRuntimeLibrary,
bool enableBufferInitialization, unsigned vectorLength,
bool enableVLAVectorization, bool enableSIMDIndex32, bool enableGPULibgen,
- SparseEmitStrategy emitStrategy);
+ SparseEmitStrategy emitStrategy,
+ SparseParallelizationStrategy parallelizationStrategy);
//===----------------------------------------------------------------------===//
// Sparse Iteration Transform Passes
diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
index c5eb965884396a..606cfa7094f32c 100644
--- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
@@ -45,7 +45,8 @@ void mlir::sparse_tensor::buildSparsifier(OpPassManager &pm,
/*enableVLAVectorization=*/options.armSVE,
/*enableSIMDIndex32=*/options.force32BitVectorIndices,
options.enableGPULibgen,
- options.sparsificationOptions().sparseEmitStrategy));
+ options.sparsificationOptions().sparseEmitStrategy,
+ options.sparsificationOptions().parallelizationStrategy));
// Bail-early for test setup.
if (options.testBufferizationAnalysisOnly)
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp
index e088328848c9c8..6e882a8d0ff30a 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp
@@ -78,7 +78,8 @@ class SparsificationAndBufferizationPass
const SparsificationOptions &sparsificationOptions,
bool createSparseDeallocs, bool enableRuntimeLibrary,
bool enableBufferInitialization, unsigned vl, bool vla, bool index32,
- bool gpu, SparseEmitStrategy emitStrategy)
+ bool gpu, SparseEmitStrategy emitStrategy,
+ SparseParallelizationStrategy parallelizationStrategy)
: bufferizationOptions(bufferizationOptions),
sparsificationOptions(sparsificationOptions),
createSparseDeallocs(createSparseDeallocs),
@@ -90,6 +91,7 @@ class SparsificationAndBufferizationPass
enableSIMDIndex32 = index32;
enableGPULibgen = gpu;
sparseEmitStrategy = emitStrategy;
+ parallelization = parallelizationStrategy;
}
/// Bufferize all dense ops. This assumes that no further analysis is needed
@@ -124,6 +126,9 @@ class SparsificationAndBufferizationPass
// Overrides the default emit strategy using user-provided value.
this->sparsificationOptions.sparseEmitStrategy = sparseEmitStrategy;
+ // Overrides the default parallelization strategy using user-provided value.
+ this->sparsificationOptions.parallelizationStrategy = parallelization;
+
// Run enabling transformations.
{
OpPassManager pm("builtin.module");
@@ -248,10 +253,12 @@ std::unique_ptr<mlir::Pass> mlir::createSparsificationAndBufferizationPass(
bool createSparseDeallocs, bool enableRuntimeLibrary,
bool enableBufferInitialization, unsigned vectorLength,
bool enableVLAVectorization, bool enableSIMDIndex32, bool enableGPULibgen,
- SparseEmitStrategy emitStrategy) {
+ SparseEmitStrategy emitStrategy,
+ SparseParallelizationStrategy parallelizationStrategy) {
return std::make_unique<
mlir::sparse_tensor::SparsificationAndBufferizationPass>(
bufferizationOptions, sparsificationOptions, createSparseDeallocs,
enableRuntimeLibrary, enableBufferInitialization, vectorLength,
- enableVLAVectorization, enableSIMDIndex32, enableGPULibgen, emitStrategy);
+ enableVLAVectorization, enableSIMDIndex32, enableGPULibgen, emitStrategy,
+ parallelizationStrategy);
}
diff --git a/mlir/test/Dialect/SparseTensor/minipipeline_vector.mlir b/mlir/test/Dialect/SparseTensor/minipipeline_vector.mlir
index 2475aa5139da48..0f5f3029e3b0ef 100755
--- a/mlir/test/Dialect/SparseTensor/minipipeline_vector.mlir
+++ b/mlir/test/Dialect/SparseTensor/minipipeline_vector.mlir
@@ -1,10 +1,14 @@
// RUN: mlir-opt %s --sparsification-and-bufferization | FileCheck %s --check-prefix=CHECK-NOVEC
// RUN: mlir-opt %s --sparsification-and-bufferization="vl=8" | FileCheck %s --check-prefix=CHECK-VEC
+// RUN: mlir-opt %s --sparsification-and-bufferization="parallelization-strategy=any-storage-any-loop" | FileCheck %s --check-prefix=CHECK-PARA
// Test to ensure we can pass optimization flags into
// the mini sparsification and bufferization pipeline.
#SV = #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>
+#SparseMatrix = #sparse_tensor.encoding<{
+ map = (d0, d1) -> (d0 : compressed, d1 : compressed)
+}>
#trait_sum_reduction = {
indexing_maps = [
@@ -15,6 +19,32 @@
doc = "x += SUM_i a(i)"
}
+#trait_ss = {
+ indexing_maps = [
+ affine_map<(i,j) -> (i,j)>, // A
+ affine_map<(i,j) -> (i,j)> // X (out)
+ ],
+ iterator_types = ["parallel", "parallel"],
+ doc = "X(i,j) = A(i,j) * SCALE"
+}
+
+//
+// CHECK-PARA-LABEL: func.func @scale_ss
+// CHECK-PARA: scf.parallel
+//
+func.func @scale_ss(%scale: f32,
+ %arga: tensor<?x?xf32, #SparseMatrix>,
+ %argx: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %0 = linalg.generic #trait_ss
+ ins(%arga: tensor<?x?xf32, #SparseMatrix>)
+ outs(%argx: tensor<?x?xf32>) {
+ ^bb(%a: f32, %x: f32):
+ %0 = arith.mulf %a, %scale : f32
+ linalg.yield %0 : f32
+ } -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
+}
+
//
// CHECK-NOVEC-LABEL: func.func @sum_reduction
// CHECK-NOVEC: scf.for
>From 305fddba52e809eeda03b1f4b96e27530015db50 Mon Sep 17 00:00:00 2001
From: Yinying Li <yinyingli at google.com>
Date: Fri, 30 Aug 2024 06:54:56 +0000
Subject: [PATCH 3/5] address comments
---
.../Dialect/SparseTensor/Transforms/Passes.td | 18 ++++-----
.../SparseTensor/minipipeline_parallel.mlir | 38 +++++++++++++++++++
2 files changed, 47 insertions(+), 9 deletions(-)
create mode 100644 mlir/test/Dialect/SparseTensor/minipipeline_parallel.mlir
diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
index 71617ee8a63d0d..f55d61a4fef6a7 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
@@ -154,6 +154,15 @@ def SparsificationPass : Pass<"sparsification", "ModuleOp"> {
];
// TODO(57514): These enum options are duplicated in Passes.h.
let options = [
+ Option<"sparseEmitStrategy", "sparse-emit-strategy", "mlir::SparseEmitStrategy",
+ "mlir::SparseEmitStrategy::kFunctional",
+ "Emit functional code or interfaces (to debug) for sparse loops", [{llvm::cl::values(
+ clEnumValN(mlir::SparseEmitStrategy::kFunctional, "functional",
+ "Emit functional code (with scf.for/while)."),
+ clEnumValN(mlir::SparseEmitStrategy::kSparseIterator, "sparse-iterator",
+ "Emit (experimental) loops (with sparse.iterate)."),
+ clEnumValN(mlir::SparseEmitStrategy::kDebugInterface, "debug-interface",
+ "Emit non-functional but easy-to-read interfaces to debug."))}]>,
Option<"parallelization", "parallelization-strategy", "mlir::SparseParallelizationStrategy",
"mlir::SparseParallelizationStrategy::kNone",
"Set the parallelization strategy", [{llvm::cl::values(
@@ -171,15 +180,6 @@ def SparsificationPass : Pass<"sparsification", "ModuleOp"> {
clEnumValN(mlir::SparseParallelizationStrategy::kAnyStorageAnyLoop,
"any-storage-any-loop",
"Enable sparse parallelization for any storage and loop."))}]>,
- Option<"sparseEmitStrategy", "sparse-emit-strategy", "mlir::SparseEmitStrategy",
- "mlir::SparseEmitStrategy::kFunctional",
- "Emit functional code or interfaces (to debug) for sparse loops", [{llvm::cl::values(
- clEnumValN(mlir::SparseEmitStrategy::kFunctional, "functional",
- "Emit functional code (with scf.for/while)."),
- clEnumValN(mlir::SparseEmitStrategy::kSparseIterator, "sparse-iterator",
- "Emit (experimental) loops (with sparse.iterate)."),
- clEnumValN(mlir::SparseEmitStrategy::kDebugInterface, "debug-interface",
- "Emit non-functional but easy-to-read interfaces to debug."))}]>,
Option<"enableRuntimeLibrary", "enable-runtime-library", "bool",
"true", "Enable runtime library for manipulating sparse tensors">,
];
diff --git a/mlir/test/Dialect/SparseTensor/minipipeline_parallel.mlir b/mlir/test/Dialect/SparseTensor/minipipeline_parallel.mlir
new file mode 100644
index 00000000000000..d97d6e58a3df2d
--- /dev/null
+++ b/mlir/test/Dialect/SparseTensor/minipipeline_parallel.mlir
@@ -0,0 +1,38 @@
+// RUN: mlir-opt %s --sparsification-and-bufferization | FileCheck %s --check-prefix=CHECK-NOPARA
+// RUN: mlir-opt %s --sparsification-and-bufferization="parallelization-strategy=any-storage-any-loop" | FileCheck %s --check-prefix=CHECK-PARA
+
+// Test to ensure we can pass parallelization flags into
+// the mini sparsification and bufferization pipeline.
+
+#SparseMatrix = #sparse_tensor.encoding<{
+ map = (d0, d1) -> (d0 : compressed, d1 : compressed)
+}>
+
+#trait_ss = {
+ indexing_maps = [
+ affine_map<(i,j) -> (i,j)>, // A
+ affine_map<(i,j) -> (i,j)> // X (out)
+ ],
+ iterator_types = ["parallel", "parallel"],
+ doc = "X(i,j) = A(i,j) * SCALE"
+}
+
+//
+// CHECK-NOPARA-LABEL: func.func @scale_ss
+// CHECK-NOPARA: scf.for
+//
+// CHECK-PARA-LABEL: func.func @scale_ss
+// CHECK-PARA: scf.parallel
+//
+func.func @scale_ss(%scale: f32,
+ %arga: tensor<?x?xf32, #SparseMatrix>,
+ %argx: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %0 = linalg.generic #trait_ss
+ ins(%arga: tensor<?x?xf32, #SparseMatrix>)
+ outs(%argx: tensor<?x?xf32>) {
+ ^bb(%a: f32, %x: f32):
+ %0 = arith.mulf %a, %scale : f32
+ linalg.yield %0 : f32
+ } -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
+}
>From b5cbb82a5ded766c12bb64e144f34657d4f2e3b8 Mon Sep 17 00:00:00 2001
From: Yinying Li <yinyingli at google.com>
Date: Fri, 30 Aug 2024 03:01:23 -0400
Subject: [PATCH 4/5] Update minipipeline_vector.mlir
---
.../SparseTensor/minipipeline_vector.mlir | 30 -------------------
1 file changed, 30 deletions(-)
diff --git a/mlir/test/Dialect/SparseTensor/minipipeline_vector.mlir b/mlir/test/Dialect/SparseTensor/minipipeline_vector.mlir
index 0f5f3029e3b0ef..2475aa5139da48 100755
--- a/mlir/test/Dialect/SparseTensor/minipipeline_vector.mlir
+++ b/mlir/test/Dialect/SparseTensor/minipipeline_vector.mlir
@@ -1,14 +1,10 @@
// RUN: mlir-opt %s --sparsification-and-bufferization | FileCheck %s --check-prefix=CHECK-NOVEC
// RUN: mlir-opt %s --sparsification-and-bufferization="vl=8" | FileCheck %s --check-prefix=CHECK-VEC
-// RUN: mlir-opt %s --sparsification-and-bufferization="parallelization-strategy=any-storage-any-loop" | FileCheck %s --check-prefix=CHECK-PARA
// Test to ensure we can pass optimization flags into
// the mini sparsification and bufferization pipeline.
#SV = #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>
-#SparseMatrix = #sparse_tensor.encoding<{
- map = (d0, d1) -> (d0 : compressed, d1 : compressed)
-}>
#trait_sum_reduction = {
indexing_maps = [
@@ -19,32 +15,6 @@
doc = "x += SUM_i a(i)"
}
-#trait_ss = {
- indexing_maps = [
- affine_map<(i,j) -> (i,j)>, // A
- affine_map<(i,j) -> (i,j)> // X (out)
- ],
- iterator_types = ["parallel", "parallel"],
- doc = "X(i,j) = A(i,j) * SCALE"
-}
-
-//
-// CHECK-PARA-LABEL: func.func @scale_ss
-// CHECK-PARA: scf.parallel
-//
-func.func @scale_ss(%scale: f32,
- %arga: tensor<?x?xf32, #SparseMatrix>,
- %argx: tensor<?x?xf32>) -> tensor<?x?xf32> {
- %0 = linalg.generic #trait_ss
- ins(%arga: tensor<?x?xf32, #SparseMatrix>)
- outs(%argx: tensor<?x?xf32>) {
- ^bb(%a: f32, %x: f32):
- %0 = arith.mulf %a, %scale : f32
- linalg.yield %0 : f32
- } -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
-}
-
//
// CHECK-NOVEC-LABEL: func.func @sum_reduction
// CHECK-NOVEC: scf.for
>From fbfda457ed7da835d945b2e4bd11bebba5487b95 Mon Sep 17 00:00:00 2001
From: Yinying Li <yinyingli at google.com>
Date: Fri, 30 Aug 2024 03:11:01 -0400
Subject: [PATCH 5/5] Update Passes.td
---
.../Dialect/SparseTensor/Transforms/Passes.td | 36 +++++++++----------
1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
index f55d61a4fef6a7..a534381bd5c2f3 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
@@ -154,15 +154,6 @@ def SparsificationPass : Pass<"sparsification", "ModuleOp"> {
];
// TODO(57514): These enum options are duplicated in Passes.h.
let options = [
- Option<"sparseEmitStrategy", "sparse-emit-strategy", "mlir::SparseEmitStrategy",
- "mlir::SparseEmitStrategy::kFunctional",
- "Emit functional code or interfaces (to debug) for sparse loops", [{llvm::cl::values(
- clEnumValN(mlir::SparseEmitStrategy::kFunctional, "functional",
- "Emit functional code (with scf.for/while)."),
- clEnumValN(mlir::SparseEmitStrategy::kSparseIterator, "sparse-iterator",
- "Emit (experimental) loops (with sparse.iterate)."),
- clEnumValN(mlir::SparseEmitStrategy::kDebugInterface, "debug-interface",
- "Emit non-functional but easy-to-read interfaces to debug."))}]>,
Option<"parallelization", "parallelization-strategy", "mlir::SparseParallelizationStrategy",
"mlir::SparseParallelizationStrategy::kNone",
"Set the parallelization strategy", [{llvm::cl::values(
@@ -180,6 +171,15 @@ def SparsificationPass : Pass<"sparsification", "ModuleOp"> {
clEnumValN(mlir::SparseParallelizationStrategy::kAnyStorageAnyLoop,
"any-storage-any-loop",
"Enable sparse parallelization for any storage and loop."))}]>,
+ Option<"sparseEmitStrategy", "sparse-emit-strategy", "mlir::SparseEmitStrategy",
+ "mlir::SparseEmitStrategy::kFunctional",
+ "Emit functional code or interfaces (to debug) for sparse loops", [{llvm::cl::values(
+ clEnumValN(mlir::SparseEmitStrategy::kFunctional, "functional",
+ "Emit functional code (with scf.for/while)."),
+ clEnumValN(mlir::SparseEmitStrategy::kSparseIterator, "sparse-iterator",
+ "Emit (experimental) loops (with sparse.iterate)."),
+ clEnumValN(mlir::SparseEmitStrategy::kDebugInterface, "debug-interface",
+ "Emit non-functional but easy-to-read interfaces to debug."))}]>,
Option<"enableRuntimeLibrary", "enable-runtime-library", "bool",
"true", "Enable runtime library for manipulating sparse tensors">,
];
@@ -487,6 +487,15 @@ def SparsificationAndBufferization : Pass<"sparsification-and-bufferization", "M
"Enable i32 indexing into vectors (for efficient gather/scatter)">,
Option<"enableGPULibgen", "enable-gpu-libgen", "bool", "false",
"Enable GPU acceleration by means of direct library calls">,
+ Option<"sparseEmitStrategy", "sparse-emit-strategy", "mlir::SparseEmitStrategy",
+ "mlir::SparseEmitStrategy::kFunctional",
+ "Emit functional code or interfaces (to debug) for sparse loops", [{llvm::cl::values(
+ clEnumValN(mlir::SparseEmitStrategy::kFunctional, "functional",
+ "Emit functional code (with scf.for/while)."),
+ clEnumValN(mlir::SparseEmitStrategy::kSparseIterator, "sparse-iterator",
+ "Emit (experimental) loops (with sparse.iterate)."),
+ clEnumValN(mlir::SparseEmitStrategy::kDebugInterface, "debug-interface",
+ "Emit non-functional but easy-to-read interfaces to debug."))}]>,
Option<"parallelization", "parallelization-strategy", "mlir::SparseParallelizationStrategy",
"mlir::SparseParallelizationStrategy::kNone",
"Set the parallelization strategy", [{llvm::cl::values(
@@ -504,15 +513,6 @@ def SparsificationAndBufferization : Pass<"sparsification-and-bufferization", "M
clEnumValN(mlir::SparseParallelizationStrategy::kAnyStorageAnyLoop,
"any-storage-any-loop",
"Enable sparse parallelization for any storage and loop."))}]>,
- Option<"sparseEmitStrategy", "sparse-emit-strategy", "mlir::SparseEmitStrategy",
- "mlir::SparseEmitStrategy::kFunctional",
- "Emit functional code or interfaces (to debug) for sparse loops", [{llvm::cl::values(
- clEnumValN(mlir::SparseEmitStrategy::kFunctional, "functional",
- "Emit functional code (with scf.for/while)."),
- clEnumValN(mlir::SparseEmitStrategy::kSparseIterator, "sparse-iterator",
- "Emit (experimental) loops (with sparse.iterate)."),
- clEnumValN(mlir::SparseEmitStrategy::kDebugInterface, "debug-interface",
- "Emit non-functional but easy-to-read interfaces to debug."))}]>,
];
}
More information about the Mlir-commits
mailing list