[Mlir-commits] [mlir] 69a7759 - [mlir][sparse] implement loop index value vectorization

Mon Mar 21 10:40:49 PDT 2022

Author: Aart Bik
Date: 2022-03-21T10:40:38-07:00
New Revision: 69a7759b402fd42c7ac3fc27ef35682001f305e5

URL: https://github.com/llvm/llvm-project/commit/69a7759b402fd42c7ac3fc27ef35682001f305e5
DIFF: https://github.com/llvm/llvm-project/commit/69a7759b402fd42c7ac3fc27ef35682001f305e5.diff

LOG: [mlir][sparse] implement loop index value vectorization

with CHECK and integration test

Reviewed By: bixia

Differential Revision: https://reviews.llvm.org/D122040

Added: 
    mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir
    mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir

Modified: 
    mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
index d1fb285c17149..0ebd4e4b5cd2e 100644

--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -873,15 +873,35 @@ static Value genAddress(CodeGen &codegen, PatternRewriter &rewriter,
 }
 
 /// Generates an index value.
-static Value genIndexValue(Merger &merger, CodeGen &codegen, unsigned exp) {
-  assert(codegen.curVecLength == 1); // TODO: implement vectorization!
+static Value genIndexValue(Merger &merger, CodeGen &codegen,
+                           PatternRewriter &rewriter, unsigned exp,
+                           unsigned ldx) {
   unsigned idx = merger.exp(exp).index;
-  return codegen.loops[idx];
+  Value ival = codegen.loops[idx];
+  Type itype = ival.getType();
+  // During vectorization, we either encounter:
+  // (1) indices already in vector form, as in ... = ind[lo:hi], good to go, or
+  // (2) single index, as in ... = i, must convert to [i, i+1, ...] for inner i.
+  unsigned vl = codegen.curVecLength;
+  if (vl > 1 && !itype.isa<VectorType>()) {
+    Location loc = ival.getLoc();
+    VectorType vtp = vectorType(codegen, itype);
+    ival = rewriter.create<vector::BroadcastOp>(loc, vtp, ival);
+    if (idx == ldx) {
+      SmallVector<APInt, 4> integers;
+      for (unsigned i = 0; i < vl; i++)
+        integers.push_back(APInt(/*width=*/64, i));
+      auto values = DenseElementsAttr::get(vtp, integers);
+      Value incr = rewriter.create<arith::ConstantOp>(loc, vtp, values);
+      ival = rewriter.create<arith::AddIOp>(loc, ival, incr);
+    }
+  }
+  return ival;
 }
 
 /// Recursively generates tensor expression.
 static Value genExp(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter,
-                    linalg::GenericOp op, unsigned exp) {
+                    linalg::GenericOp op, unsigned exp, unsigned ldx) {
   Location loc = op.getLoc();
   if (exp == -1u)
     return Value();
@@ -890,9 +910,11 @@ static Value genExp(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter,
   if (merger.exp(exp).kind == Kind::kInvariant)
     return genInvariantValue(merger, codegen, rewriter, exp);
   if (merger.exp(exp).kind == Kind::kIndex)
-    return genIndexValue(merger, codegen, exp);
-  Value v0 = genExp(merger, codegen, rewriter, op, merger.exp(exp).children.e0);
-  Value v1 = genExp(merger, codegen, rewriter, op, merger.exp(exp).children.e1);
+    return genIndexValue(merger, codegen, rewriter, exp, ldx);
+  Value v0 =
+      genExp(merger, codegen, rewriter, op, merger.exp(exp).children.e0, ldx);
+  Value v1 =
+      genExp(merger, codegen, rewriter, op, merger.exp(exp).children.e1, ldx);
   return merger.buildExp(rewriter, loc, exp, v0, v1);
 }
 
@@ -1561,7 +1583,8 @@ static void genStmt(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter,
                     unsigned exp, unsigned at) {
   // At each leaf, assign remaining tensor (sub)expression to output tensor.
   if (at == topSort.size()) {
-    Value rhs = genExp(merger, codegen, rewriter, op, exp);
+    unsigned ldx = topSort[at - 1];
+    Value rhs = genExp(merger, codegen, rewriter, op, exp, ldx);
     genTensorStore(merger, codegen, rewriter, op, rhs);
     return;
   }
@@ -1645,7 +1668,6 @@ struct GenericOpSparsifier : public OpRewritePattern<linalg::GenericOp> {
 
   LogicalResult matchAndRewrite(linalg::GenericOp op,
                                 PatternRewriter &rewriter) const override {
-
     // Detects sparse annotations and translate the per-dimension sparsity
     // information for all tensors to loop indices in the kernel.
     assert(op.getNumOutputs() == 1);

diff  --git a/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir
new file mode 100644
index 0000000000000..253ac0cb37f49
--- /dev/null
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir
@@ -0,0 +1,124 @@
+// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+// The script is designed to make adding checks to
+// a test case fast, it is *not* designed to be authoritative
+// about what constitutes a good test! The CHECK should be
+// minimized and named to reflect the test intent.
+
+// RUN: mlir-opt %s -sparsification="vectorization-strategy=2 vl=8" -canonicalize | \
+// RUN:   FileCheck %s
+
+#SparseVector = #sparse_tensor.encoding<{
+  dimLevelType = ["compressed"]
+}>
+
+#trait_1d = {
+  indexing_maps = [
+    affine_map<(i) -> (i)>,  // a
+    affine_map<(i) -> (i)>   // x (out)
+  ],
+  iterator_types = ["parallel"],
+  doc = "X(i) = a(i) op i"
+}
+
+// CHECK-LABEL:   func @sparse_index_1d_conj(
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<8xi64> {
+// CHECK-DAG:       %[[VAL_1:.*]] = arith.constant dense<0> : vector<8xi64>
+// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant dense<0> : vector<8xindex>
+// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 8 : index
+// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : i64
+// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_6]] : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_6]] : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
+// CHECK-DAG:       %[[VAL_10:.*]] = memref.alloc() : memref<8xi64>
+// CHECK-DAG:       linalg.fill ins(%[[VAL_5]] : i64) outs(%[[VAL_10]] : memref<8xi64>)
+// CHECK-DAG:       %[[VAL_11:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK:           scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_3]] {
+// CHECK:             %[[VAL_14:.*]] = affine.min #map0(%[[VAL_13]]){{\[}}%[[VAL_12]]]
+// CHECK:             %[[VAL_15:.*]] = vector.create_mask %[[VAL_14]] : vector<8xi1>
+// CHECK:             %[[VAL_16:.*]] = vector.maskedload %[[VAL_8]]{{\[}}%[[VAL_13]]], %[[VAL_15]], %[[VAL_2]] : memref<?xindex>, vector<8xi1>, vector<8xindex> into vector<8xindex>
+// CHECK:             %[[VAL_17:.*]] = vector.maskedload %[[VAL_9]]{{\[}}%[[VAL_13]]], %[[VAL_15]], %[[VAL_1]] : memref<?xi64>, vector<8xi1>, vector<8xi64> into vector<8xi64>
+// CHECK:             %[[VAL_18:.*]] = arith.index_cast %[[VAL_16]] : vector<8xindex> to vector<8xi64>
+// CHECK:             %[[VAL_19:.*]] = arith.muli %[[VAL_17]], %[[VAL_18]] : vector<8xi64>
+// CHECK:             vector.scatter %[[VAL_10]]{{\[}}%[[VAL_6]]] {{\[}}%[[VAL_16]]], %[[VAL_15]], %[[VAL_19]] : memref<8xi64>, vector<8xindex>, vector<8xi1>, vector<8xi64>
+// CHECK:           }
+// CHECK:           %[[VAL_20:.*]] = bufferization.to_tensor %[[VAL_10]] : memref<8xi64>
+// CHECK:           return %[[VAL_20]] : tensor<8xi64>
+// CHECK:         }
+func @sparse_index_1d_conj(%arga: tensor<8xi64, #SparseVector>) -> tensor<8xi64> {
+  %init = linalg.init_tensor [8] : tensor<8xi64>
+  %r = linalg.generic #trait_1d
+      ins(%arga: tensor<8xi64, #SparseVector>)
+     outs(%init: tensor<8xi64>) {
+      ^bb(%a: i64, %x: i64):
+        %i = linalg.index 0 : index
+        %ii = arith.index_cast %i : index to i64
+        %m1 = arith.muli %a, %ii : i64
+        linalg.yield %m1 : i64
+  } -> tensor<8xi64>
+  return %r : tensor<8xi64>
+}
+
+// CHECK-LABEL:   func @sparse_index_1d_disj(
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<8xi64> {
+// CHECK-DAG:       %[[VAL_1:.*]] = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7]> : vector<8xindex>
+// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : i64
+// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 8 : index
+// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_5]] : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_5]] : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
+// CHECK-DAG:       %[[VAL_9:.*]] = memref.alloc() : memref<8xi64>
+// CHECK-DAG:       linalg.fill ins(%[[VAL_3]] : i64) outs(%[[VAL_9]] : memref<8xi64>)
+// CHECK-DAG:       %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_2]]] : memref<?xindex>
+// CHECK:           %[[VAL_12:.*]]:2 = scf.while (%[[VAL_13:.*]] = %[[VAL_10]], %[[VAL_14:.*]] = %[[VAL_5]]) : (index, index) -> (index, index) {
+// CHECK:             %[[VAL_15:.*]] = arith.cmpi ult, %[[VAL_13]], %[[VAL_11]] : index
+// CHECK:             scf.condition(%[[VAL_15]]) %[[VAL_13]], %[[VAL_14]] : index, index
+// CHECK:           } do {
+// CHECK:           ^bb0(%[[VAL_16:.*]]: index, %[[VAL_17:.*]]: index):
+// CHECK:             %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_16]]] : memref<?xindex>
+// CHECK:             %[[VAL_19:.*]] = arith.cmpi eq, %[[VAL_18]], %[[VAL_17]] : index
+// CHECK:             scf.if %[[VAL_19]] {
+// CHECK:               %[[VAL_20:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_16]]] : memref<?xi64>
+// CHECK:               %[[VAL_21:.*]] = arith.index_cast %[[VAL_17]] : index to i64
+// CHECK:               %[[VAL_22:.*]] = arith.addi %[[VAL_20]], %[[VAL_21]] : i64
+// CHECK:               memref.store %[[VAL_22]], %[[VAL_9]]{{\[}}%[[VAL_17]]] : memref<8xi64>
+// CHECK:             } else {
+// CHECK:               %[[VAL_23:.*]] = arith.index_cast %[[VAL_17]] : index to i64
+// CHECK:               memref.store %[[VAL_23]], %[[VAL_9]]{{\[}}%[[VAL_17]]] : memref<8xi64>
+// CHECK:             }
+// CHECK:             %[[VAL_24:.*]] = arith.cmpi eq, %[[VAL_18]], %[[VAL_17]] : index
+// CHECK:             %[[VAL_25:.*]] = arith.addi %[[VAL_16]], %[[VAL_2]] : index
+// CHECK:             %[[VAL_26:.*]] = arith.select %[[VAL_24]], %[[VAL_25]], %[[VAL_16]] : index
+// CHECK:             %[[VAL_27:.*]] = arith.addi %[[VAL_17]], %[[VAL_2]] : index
+// CHECK:             scf.yield %[[VAL_26]], %[[VAL_27]] : index, index
+// CHECK:           }
+// CHECK:           scf.for %[[VAL_28:.*]] = %[[VAL_29:.*]]#1 to %[[VAL_4]] step %[[VAL_4]] {
+// CHECK:             %[[VAL_30:.*]] = affine.min #map1(%[[VAL_28]])
+// CHECK:             %[[VAL_31:.*]] = vector.create_mask %[[VAL_30]] : vector<8xi1>
+// CHECK:             %[[VAL_32:.*]] = vector.broadcast %[[VAL_28]] : index to vector<8xindex>
+// CHECK:             %[[VAL_33:.*]] = arith.addi %[[VAL_32]], %[[VAL_1]] : vector<8xindex>
+// CHECK:             %[[VAL_34:.*]] = arith.index_cast %[[VAL_33]] : vector<8xindex> to vector<8xi64>
+// CHECK:             vector.maskedstore %[[VAL_9]]{{\[}}%[[VAL_28]]], %[[VAL_31]], %[[VAL_34]] : memref<8xi64>, vector<8xi1>, vector<8xi64>
+// CHECK:           }
+// CHECK:           %[[VAL_35:.*]] = bufferization.to_tensor %[[VAL_9]] : memref<8xi64>
+// CHECK:           return %[[VAL_35]] : tensor<8xi64>
+// CHECK:         }
+func @sparse_index_1d_disj(%arga: tensor<8xi64, #SparseVector>) -> tensor<8xi64> {
+  %init = linalg.init_tensor [8] : tensor<8xi64>
+  %r = linalg.generic #trait_1d
+      ins(%arga: tensor<8xi64, #SparseVector>)
+     outs(%init: tensor<8xi64>) {
+      ^bb(%a: i64, %x: i64):
+        %i = linalg.index 0 : index
+        %ii = arith.index_cast %i : index to i64
+        %m1 = arith.addi %a, %ii : i64
+        linalg.yield %m1 : i64
+  } -> tensor<8xi64>
+  return %r : tensor<8xi64>
+}

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir
new file mode 100644
index 0000000000000..058a1f48b5bf6
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir
@@ -0,0 +1,208 @@
+// RUN: mlir-opt %s --sparse-compiler | \
+// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
+// RUN:  -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+//
+// Do the same run, but now with SIMDization as well. This should not change the outcome.
+//
+// RUN: mlir-opt %s --sparse-compiler="vectorization-strategy=2 vl=4" | \
+// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
+// RUN:  -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+#SparseVector = #sparse_tensor.encoding<{
+  dimLevelType = ["compressed"]
+}>
+
+#SparseMatrix = #sparse_tensor.encoding<{
+  dimLevelType = ["compressed", "compressed"]
+}>
+
+#trait_1d = {
+  indexing_maps = [
+    affine_map<(i) -> (i)>,  // a
+    affine_map<(i) -> (i)>   // x (out)
+  ],
+  iterator_types = ["parallel"],
+  doc = "X(i) = a(i) op i"
+}
+
+#trait_2d = {
+  indexing_maps = [
+    affine_map<(i,j) -> (i,j)>,  // A
+    affine_map<(i,j) -> (i,j)>   // X (out)
+  ],
+  iterator_types = ["parallel", "parallel"],
+  doc = "X(i,j) = A(i,j) op i op j"
+}
+
+//
+// Test with indices and sparse inputs. All outputs are dense.
+//
+module {
+
+  //
+  // Kernel that uses index in the index notation (conjunction).
+  //
+  func @sparse_index_1d_conj(%arga: tensor<8xi64, #SparseVector>) -> tensor<8xi64> {
+    %init = linalg.init_tensor [8] : tensor<8xi64>
+    %r = linalg.generic #trait_1d
+        ins(%arga: tensor<8xi64, #SparseVector>)
+       outs(%init: tensor<8xi64>) {
+        ^bb(%a: i64, %x: i64):
+          %i = linalg.index 0 : index
+          %ii = arith.index_cast %i : index to i64
+          %m1 = arith.muli %a, %ii : i64
+          linalg.yield %m1 : i64
+    } -> tensor<8xi64>
+    return %r : tensor<8xi64>
+  }
+
+  //
+  // Kernel that uses index in the index notation (disjunction).
+  //
+  func @sparse_index_1d_disj(%arga: tensor<8xi64, #SparseVector>) -> tensor<8xi64> {
+    %init = linalg.init_tensor [8] : tensor<8xi64>
+    %r = linalg.generic #trait_1d
+        ins(%arga: tensor<8xi64, #SparseVector>)
+       outs(%init: tensor<8xi64>) {
+        ^bb(%a: i64, %x: i64):
+          %i = linalg.index 0 : index
+          %ii = arith.index_cast %i : index to i64
+          %m1 = arith.addi %a, %ii : i64
+          linalg.yield %m1 : i64
+    } -> tensor<8xi64>
+    return %r : tensor<8xi64>
+  }
+
+  //
+  // Kernel that uses indices in the index notation (conjunction).
+  //
+  func @sparse_index_2d_conj(%arga: tensor<3x4xi64, #SparseMatrix>) -> tensor<3x4xi64> {
+    %init = linalg.init_tensor [3,4] : tensor<3x4xi64>
+    %r = linalg.generic #trait_2d
+        ins(%arga: tensor<3x4xi64, #SparseMatrix>)
+       outs(%init: tensor<3x4xi64>) {
+        ^bb(%a: i64, %x: i64):
+          %i = linalg.index 0 : index
+          %j = linalg.index 1 : index
+          %ii = arith.index_cast %i : index to i64
+          %jj = arith.index_cast %j : index to i64
+          %m1 = arith.muli %ii, %a : i64
+          %m2 = arith.muli %jj, %m1 : i64
+          linalg.yield %m2 : i64
+    } -> tensor<3x4xi64>
+    return %r : tensor<3x4xi64>
+  }
+
+  //
+  // Kernel that uses indices in the index notation (disjunction).
+  //
+  func @sparse_index_2d_disj(%arga: tensor<3x4xi64, #SparseMatrix>) -> tensor<3x4xi64> {
+    %init = linalg.init_tensor [3,4] : tensor<3x4xi64>
+    %r = linalg.generic #trait_2d
+        ins(%arga: tensor<3x4xi64, #SparseMatrix>)
+       outs(%init: tensor<3x4xi64>) {
+        ^bb(%a: i64, %x: i64):
+          %i = linalg.index 0 : index
+          %j = linalg.index 1 : index
+          %ii = arith.index_cast %i : index to i64
+          %jj = arith.index_cast %j : index to i64
+          %m1 = arith.addi %ii, %a : i64
+          %m2 = arith.addi %jj, %m1 : i64
+          linalg.yield %m2 : i64
+    } -> tensor<3x4xi64>
+    return %r : tensor<3x4xi64>
+  }
+
+  //
+  // Main driver.
+  //
+  func @entry() {
+    %c0 = arith.constant 0 : index
+    %du = arith.constant -1 : i64
+
+    // Setup input sparse vector.
+    %v1 = arith.constant sparse<[[2], [4]], [ 10, 20]> : tensor<8xi64>
+    %sv = sparse_tensor.convert %v1 : tensor<8xi64> to tensor<8xi64, #SparseVector>
+
+    // Setup input "sparse" vector.
+    %v2 = arith.constant dense<[ 1,  2,  4,  8,  16,  32,  64,  128 ]> : tensor<8xi64>
+    %dv = sparse_tensor.convert %v2 : tensor<8xi64> to tensor<8xi64, #SparseVector>
+
+    // Setup input sparse matrix.
+    %m1 = arith.constant sparse<[[1,1], [2,3]], [10, 20]> : tensor<3x4xi64>
+    %sm = sparse_tensor.convert %m1 : tensor<3x4xi64> to tensor<3x4xi64, #SparseMatrix>
+
+    // Setup input "sparse" matrix.
+    %m2 = arith.constant dense <[ [ 1,  1,  1,  1 ],
+                                  [ 1,  2,  1,  1 ],
+                                  [ 1,  1,  3,  4 ] ]> : tensor<3x4xi64>
+    %dm = sparse_tensor.convert %m2 : tensor<3x4xi64> to tensor<3x4xi64, #SparseMatrix>
+
+    // Call the kernels.
+    %0 = call @sparse_index_1d_conj(%sv) : (tensor<8xi64, #SparseVector>) -> tensor<8xi64>
+    %1 = call @sparse_index_1d_disj(%sv) : (tensor<8xi64, #SparseVector>) -> tensor<8xi64>
+    %2 = call @sparse_index_1d_conj(%dv) : (tensor<8xi64, #SparseVector>) -> tensor<8xi64>
+    %3 = call @sparse_index_1d_disj(%dv) : (tensor<8xi64, #SparseVector>) -> tensor<8xi64>
+    %4 = call @sparse_index_2d_conj(%sm) : (tensor<3x4xi64, #SparseMatrix>) -> tensor<3x4xi64>
+    %5 = call @sparse_index_2d_disj(%sm) : (tensor<3x4xi64, #SparseMatrix>) -> tensor<3x4xi64>
+    %6 = call @sparse_index_2d_conj(%dm) : (tensor<3x4xi64, #SparseMatrix>) -> tensor<3x4xi64>
+    %7 = call @sparse_index_2d_disj(%dm) : (tensor<3x4xi64, #SparseMatrix>) -> tensor<3x4xi64>
+
+    // Get the backing buffers.
+    %mem0 = bufferization.to_memref %0 : memref<8xi64>
+    %mem1 = bufferization.to_memref %1 : memref<8xi64>
+    %mem2 = bufferization.to_memref %2 : memref<8xi64>
+    %mem3 = bufferization.to_memref %3 : memref<8xi64>
+    %mem4 = bufferization.to_memref %4 : memref<3x4xi64>
+    %mem5 = bufferization.to_memref %5 : memref<3x4xi64>
+    %mem6 = bufferization.to_memref %6 : memref<3x4xi64>
+    %mem7 = bufferization.to_memref %7 : memref<3x4xi64>
+
+    //
+    // Verify result.
+    //
+    // CHECK:      ( 0, 0, 20, 0, 80, 0, 0, 0 )
+    // CHECK-NEXT: ( 0, 1, 12, 3, 24, 5, 6, 7 )
+    // CHECK-NEXT: ( 0, 2, 8, 24, 64, 160, 384, 896 )
+    // CHECK-NEXT: ( 1, 3, 6, 11, 20, 37, 70, 135 )
+    // CHECK-NEXT: ( ( 0, 0, 0, 0 ), ( 0, 10, 0, 0 ), ( 0, 0, 0, 120 ) )
+    // CHECK-NEXT: ( ( 0, 1, 2, 3 ), ( 1, 12, 3, 4 ), ( 2, 3, 4, 25 ) )
+    // CHECK-NEXT: ( ( 0, 0, 0, 0 ), ( 0, 2, 2, 3 ), ( 0, 2, 12, 24 ) )
+    // CHECK-NEXT: ( ( 1, 2, 3, 4 ), ( 2, 4, 4, 5 ), ( 3, 4, 7, 9 ) )
+    //
+    %vv0 = vector.transfer_read %mem0[%c0], %du: memref<8xi64>, vector<8xi64>
+    %vv1 = vector.transfer_read %mem1[%c0], %du: memref<8xi64>, vector<8xi64>
+    %vv2 = vector.transfer_read %mem2[%c0], %du: memref<8xi64>, vector<8xi64>
+    %vv3 = vector.transfer_read %mem3[%c0], %du: memref<8xi64>, vector<8xi64>
+    %vv4 = vector.transfer_read %mem4[%c0,%c0], %du: memref<3x4xi64>, vector<3x4xi64>
+    %vv5 = vector.transfer_read %mem5[%c0,%c0], %du: memref<3x4xi64>, vector<3x4xi64>
+    %vv6 = vector.transfer_read %mem6[%c0,%c0], %du: memref<3x4xi64>, vector<3x4xi64>
+    %vv7 = vector.transfer_read %mem7[%c0,%c0], %du: memref<3x4xi64>, vector<3x4xi64>
+    vector.print %vv0 : vector<8xi64>
+    vector.print %vv1 : vector<8xi64>
+    vector.print %vv2 : vector<8xi64>
+    vector.print %vv3 : vector<8xi64>
+    vector.print %vv4 : vector<3x4xi64>
+    vector.print %vv5 : vector<3x4xi64>
+    vector.print %vv6 : vector<3x4xi64>
+    vector.print %vv7 : vector<3x4xi64>
+
+    // Release resources.
+    sparse_tensor.release %sv : tensor<8xi64, #SparseVector>
+    sparse_tensor.release %dv : tensor<8xi64, #SparseVector>
+    sparse_tensor.release %sm : tensor<3x4xi64, #SparseMatrix>
+    sparse_tensor.release %dm : tensor<3x4xi64, #SparseMatrix>
+    memref.dealloc %mem0 : memref<8xi64>
+    memref.dealloc %mem1 : memref<8xi64>
+    memref.dealloc %mem2 : memref<8xi64>
+    memref.dealloc %mem3 : memref<8xi64>
+    memref.dealloc %mem4 : memref<3x4xi64>
+    memref.dealloc %mem5 : memref<3x4xi64>
+    memref.dealloc %mem6 : memref<3x4xi64>
+    memref.dealloc %mem7 : memref<3x4xi64>
+
+    return
+  }
+}