[Mlir-commits] [mlir] 849f016 - [mlir][sparse] accept affine subscripts in outer dimensions of dense memrefs
Aart Bik
llvmlistbot at llvm.org
Mon Oct 11 11:45:20 PDT 2021
Author: Aart Bik
Date: 2021-10-11T11:45:14-07:00
New Revision: 849f016ce8322d492e7d85eaf0c2d322e8836a64
URL: https://github.com/llvm/llvm-project/commit/849f016ce8322d492e7d85eaf0c2d322e8836a64
DIFF: https://github.com/llvm/llvm-project/commit/849f016ce8322d492e7d85eaf0c2d322e8836a64.diff
LOG: [mlir][sparse] accept affine subscripts in outer dimensions of dense memrefs
This relaxes vectorization of dense memrefs a bit so that affine expressions
are allowed in more outer dimensions. Vectorization of non unit stride
references is disabled though, since this seems ineffective anyway.
Reviewed By: bixia
Differential Revision: https://reviews.llvm.org/D111469
Added:
Modified:
mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
mlir/test/Dialect/SparseTensor/sparse_vector.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
index 373f3d1391a20..bcebf0721c6cc 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -929,22 +929,23 @@ static bool isParallelFor(CodeGen &codegen, bool isOuter, bool isReduction,
llvm_unreachable("unexpected parallelization strategy");
}
-/// Checks unit strides for dense tensors. The iteration graph may have ignored
+/// Checks unit stride for dense tensors. The iteration graph may have ignored
/// dense access patterns in order to avoid cycles (sparse access patterns are
/// always placed innermost), but that means dense access has become strided.
-/// For now, we reject vectorization of such cases.
-/// TODO: implement strided load/stores on dense arrays
+/// This prevents effective vectorization.
static bool denseUnitStrides(Merger &merger, linalg::GenericOp op,
- unsigned ldx) {
+ unsigned idx) {
for (OpOperand *t : op.getInputAndOutputOperands()) {
if (!getSparseTensorEncoding(t->get().getType())) {
auto map = op.getTiedIndexingMap(t);
for (unsigned d = 0, rank = map.getNumResults(); d < rank; d++) {
AffineExpr a = map.getResult(d);
- if (a.getKind() != AffineExprKind::DimId)
- return false; // very conservative
- unsigned idx = a.cast<AffineDimExpr>().getPosition();
- if (idx == ldx && d != rank - 1)
+ // Report non-unit stride if innermost index appears at an outer
+ // dimension (true non-unit stride) or if the innermost index appears
+ // in a compound subscript in the innermost dimension. Even if the
+ // latter is unit stride, it does not play well with scatter/gather.
+ if (a.isFunctionOfDim(idx) &&
+ ((d != rank - 1) || (a.getKind() != AffineExprKind::DimId)))
return false;
}
}
diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector.mlir
index 2b2492fc1db1e..46af303b6662c 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_vector.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector.mlir
@@ -1,10 +1,10 @@
-// RUN: mlir-opt %s -sparsification="vectorization-strategy=0 vl=16" -split-input-file | \
+// RUN: mlir-opt %s -sparsification="vectorization-strategy=0 vl=16" -cse -split-input-file | \
// RUN: FileCheck %s --check-prefix=CHECK-VEC0
-// RUN: mlir-opt %s -sparsification="vectorization-strategy=1 vl=16" -split-input-file | \
+// RUN: mlir-opt %s -sparsification="vectorization-strategy=1 vl=16" -cse -split-input-file | \
// RUN: FileCheck %s --check-prefix=CHECK-VEC1
-// RUN: mlir-opt %s -sparsification="vectorization-strategy=2 vl=16" -split-input-file | \
+// RUN: mlir-opt %s -sparsification="vectorization-strategy=2 vl=16" -cse -split-input-file | \
// RUN: FileCheck %s --check-prefix=CHECK-VEC2
-// RUN: mlir-opt %s -sparsification="vectorization-strategy=2 vl=16 enable-simd-index32=true" -split-input-file | \
+// RUN: mlir-opt %s -sparsification="vectorization-strategy=2 vl=16 enable-simd-index32=true" -cse -split-input-file | \
// RUN: FileCheck %s --check-prefix=CHECK-VEC3
#DenseVector = #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>
@@ -386,3 +386,87 @@ func @mul_ds(%arga: tensor<512x1024xf32, #SparseMatrix>, %argb: tensor<512x1024x
} -> tensor<512x1024xf32>
return %0 : tensor<512x1024xf32>
}
+
+// -----
+
+#SparseMatrix = #sparse_tensor.encoding<{dimLevelType = ["dense","compressed"]}>
+
+#trait_affine = {
+ indexing_maps = [
+ affine_map<(i,j) -> (i,j)>,
+ affine_map<(i,j) -> (i+1,j)>
+ ],
+ iterator_types = ["parallel","parallel"],
+ doc = "X(i+1,j) += A(i,j)"
+}
+
+//
+// CHECK-VEC0-LABEL: func @add_dense
+// CHECK-VEC0-DAG: %[[c0:.*]] = constant 0 : index
+// CHECK-VEC0-DAG: %[[c1:.*]] = constant 1 : index
+// CHECK-VEC0-DAG: %[[c32:.*]] = constant 32 : index
+// CHECK-VEC0: scf.for %[[i:.*]] = %[[c0]] to %[[c32]] step %[[c1]] {
+// CHECK-VEC0: %[[lo:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xindex>
+// CHECK-VEC0: %[[i1:.*]] = addi %[[i]], %[[c1]] : index
+// CHECK-VEC0: %[[hi:.*]] = memref.load %{{.*}}[%[[i1]]] : memref<?xindex>
+// CHECK-VEC0: scf.for %[[jj:.*]] = %[[lo]] to %[[hi]] step %[[c1]] {
+// CHECK-VEC0: %[[j:.*]] = memref.load %{{.*}}[%[[jj]]] : memref<?xindex>
+// CHECK-VEC0: %[[x:.*]] = memref.load %{{.*}}[%[[i1]], %[[j]]] : memref<33x64xf64>
+// CHECK-VEC0: %[[a:.*]] = memref.load %{{.*}}[%[[jj]]] : memref<?xf64>
+// CHECK-VEC0: %[[s:.*]] = addf %[[x]], %[[a]] : f64
+// CHECK-VEC0: memref.store %[[s]], %{{.*}}[%[[i1]], %[[j]]] : memref<33x64xf64>
+// CHECK-VEC0: }
+// CHECK-VEC0: }
+// CHECK-VEC0: return
+//
+// CHECK-VEC1-LABEL: func @add_dense
+// CHECK-VEC1-DAG: %[[c0:.*]] = constant 0 : index
+// CHECK-VEC1-DAG: %[[c1:.*]] = constant 1 : index
+// CHECK-VEC1-DAG: %[[c32:.*]] = constant 32 : index
+// CHECK-VEC1: scf.for %[[i:.*]] = %[[c0]] to %[[c32]] step %[[c1]] {
+// CHECK-VEC1: %[[lo:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xindex>
+// CHECK-VEC1: %[[i1:.*]] = addi %[[i]], %[[c1]] : index
+// CHECK-VEC1: %[[hi:.*]] = memref.load %{{.*}}[%[[i1]]] : memref<?xindex>
+// CHECK-VEC1: scf.for %[[jj:.*]] = %[[lo]] to %[[hi]] step %[[c1]] {
+// CHECK-VEC1: %[[j:.*]] = memref.load %{{.*}}[%[[jj]]] : memref<?xindex>
+// CHECK-VEC1: %[[x:.*]] = memref.load %{{.*}}[%[[i1]], %[[j]]] : memref<33x64xf64>
+// CHECK-VEC1: %[[a:.*]] = memref.load %{{.*}}[%[[jj]]] : memref<?xf64>
+// CHECK-VEC1: %[[s:.*]] = addf %[[x]], %[[a]] : f64
+// CHECK-VEC1: memref.store %[[s]], %{{.*}}[%[[i1]], %[[j]]] : memref<33x64xf64>
+// CHECK-VEC1: }
+// CHECK-VEC1: }
+// CHECK-VEC1: return
+//
+// CHECK-VEC2: #[[$map:.*]] = affine_map<(d0, d1)[s0] -> (16, d0 - d1)
+// CHECK-VEC2-LABEL: func @add_dense
+// CHECK-VEC2-DAG: %[[c0:.*]] = constant 0 : index
+// CHECK-VEC2-DAG: %[[c1:.*]] = constant 1 : index
+// CHECK-VEC2-DAG: %[[c16:.*]] = constant 16 : index
+// CHECK-VEC2-DAG: %[[c32:.*]] = constant 32 : index
+// CHECK-VEC2: scf.for %[[i:.*]] = %[[c0]] to %[[c32]] step %[[c1]] {
+// CHECK-VEC2: %[[lo:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xindex>
+// CHECK-VEC2: %[[i1:.*]] = addi %[[i]], %[[c1]] : index
+// CHECK-VEC2: %[[hi:.*]] = memref.load %{{.*}}[%[[i1]]] : memref<?xindex>
+// CHECK-VEC2: scf.for %[[jj:.*]] = %[[lo]] to %[[hi]] step %[[c16]] {
+// CHECK-VEC2: %[[sub:.*]] = affine.min #[[$map]](%[[hi]], %[[jj]])[%[[c16]]]
+// CHECK-VEC2: %[[mask:.*]] = vector.create_mask %[[sub]] : vector<16xi1>
+// CHECK-VEC2: %[[j:.*]] = vector.maskedload %{{.*}}[%[[jj]]], %[[mask]], %{{.*}} : memref<?xindex>
+// CHECK-VEC2: %[[x:.*]] = vector.gather %{{.*}}[%[[i1]], %[[c0]]] [%[[j]]], %[[mask]], %{{.*}} : memref<33x64xf64>
+// CHECK-VEC2: %[[a:.*]] = vector.maskedload %{{.*}}[%[[jj]]], %[[mask]], %{{.*}} : memref<?xf64>
+// CHECK-VEC2: %[[s:.*]] = addf %[[x]], %[[a]] : vector<16xf64>
+// CHECK-VEC2: vector.scatter %{{.*}}[%[[i1]], %[[c0]]] [%[[j]]], %[[mask]], %[[s]] : memref<33x64xf64>
+// CHECK-VEC2: }
+// CHECK-VEC2: }
+// CHECK-VEC2: return
+//
+func @add_dense(%arga: tensor<32x64xf64, #SparseMatrix>,
+ %argx: tensor<33x64xf64> {linalg.inplaceable = true}) -> tensor<33x64xf64> {
+ %0 = linalg.generic #trait_affine
+ ins(%arga: tensor<32x64xf64, #SparseMatrix>)
+ outs(%argx: tensor<33x64xf64>) {
+ ^bb(%a: f64, %x: f64):
+ %0 = addf %x, %a : f64
+ linalg.yield %0 : f64
+ } -> tensor<33x64xf64>
+ return %0 : tensor<33x64xf64>
+}
More information about the Mlir-commits
mailing list