[Mlir-commits] [clang] [compiler-rt] [libc] [lldb] [llvm] [mlir] [mlir][sparse] Finish migrating integration tests to use sparse_tensor.print (PR #84997)

Tue Mar 12 17:45:50 PDT 2024

https://github.com/llvmgnsyncbot updated https://github.com/llvm/llvm-project/pull/84997

>From 94e27c265a9aeb3659175ecee81a68d1763e0180 Mon Sep 17 00:00:00 2001
From: Peiming Liu <peiming at google.com>
Date: Tue, 12 Mar 2024 16:59:17 -0700
Subject: [PATCH 01/10] =?UTF-8?q?[mlir][sparse]=20reuse=20tensor.insert=20?=
 =?UTF-8?q?operation=20to=20insert=20elements=20into=20=E2=80=A6=20(#84987?=
 =?UTF-8?q?)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…a sparse tensor.
---
 .../SparseTensor/IR/SparseTensorOps.td        | 42 -------------------
 .../SparseTensor/IR/SparseTensorDialect.cpp   |  9 ----
 .../BufferizableOpInterfaceImpl.cpp           | 22 ----------
 .../Transforms/SparseReinterpretMap.cpp       |  4 +-
 .../Transforms/SparseTensorCodegen.cpp        | 19 +++++----
 .../Transforms/SparseTensorConversion.cpp     | 21 ++++++----
 .../Transforms/SparseTensorRewriting.cpp      |  6 ++-
 .../Transforms/Sparsification.cpp             |  5 ++-
 mlir/test/Dialect/SparseTensor/codegen.mlir   |  6 +--
 .../SparseTensor/constant_index_map.mlir      |  2 +-
 .../test/Dialect/SparseTensor/conversion.mlir |  2 +-
 mlir/test/Dialect/SparseTensor/invalid.mlir   | 18 --------
 mlir/test/Dialect/SparseTensor/roundtrip.mlir |  4 +-
 mlir/test/Dialect/SparseTensor/sparse_2d.mlir | 10 ++---
 .../SparseTensor/sparse_broadcast.mlir        |  2 +-
 .../sparse_conv_2d_slice_based.mlir           |  2 +-
 .../Dialect/SparseTensor/sparse_fp_ops.mlir   |  4 +-
 .../Dialect/SparseTensor/sparse_index.mlir    |  2 +-
 .../test/Dialect/SparseTensor/sparse_out.mlir |  4 +-
 .../SparseTensor/sparse_reinterpret_map.mlir  |  2 +-
 .../Dialect/SparseTensor/sparse_reshape.mlir  |  8 ++--
 .../SparseTensor/sparse_tensor_reshape.mlir   |  2 +-
 .../SparseTensor/sparse_transpose.mlir        |  2 +-
 .../SparseTensor/CPU/sparse_insert_1d.mlir    | 10 ++---
 .../SparseTensor/CPU/sparse_insert_2d.mlir    | 40 +++++++++---------
 .../SparseTensor/CPU/sparse_insert_3d.mlir    | 40 +++++++++---------
 26 files changed, 106 insertions(+), 182 deletions(-)

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
index feed15d6af0544..0498576fcffc51 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
@@ -668,48 +668,6 @@ def SparseTensor_CrdTranslateOp : SparseTensor_Op<"crd_translate", [Pure]>,
 // refined over time as our sparse abstractions evolve.
 //===----------------------------------------------------------------------===//
 
-def SparseTensor_InsertOp : SparseTensor_Op<"insert",
-    [TypesMatchWith<"value type matches element type of tensor",
-                    "tensor", "value",
-                    "::llvm::cast<ShapedType>($_self).getElementType()">,
-     AllTypesMatch<["tensor", "result"]>]>,
-    Arguments<(ins AnyType:$value,
-               AnySparseTensor:$tensor,
-               Variadic<Index>:$lvlCoords)>,
-    Results<(outs AnySparseTensor:$result)> {
-  string summary = "Inserts a value into the sparse tensor";
-  string description = [{
-    Inserts the value into the underlying storage of the tensor at the
-    given level-coordinates. The arity of `lvlCoords` must match the
-    level-rank of the tensor. This operation can only be applied when
-    the tensor materializes unintialized from a `tensor.empty` operation
-    and the final tensor is constructed with a `load` operation which
-    has the `hasInserts` attribute set.
-
-    The level-properties of the sparse tensor type fully describe what
-    kind of insertion order is allowed.  When all levels have "unique"
-    and "ordered" properties, for example, insertions should occur in
-    strict lexicographical level-coordinate order.  Other properties
-    define different insertion regimens.  Inserting in a way contrary
-    to these properties results in undefined behavior.
-
-    Note that this operation is "impure" in the sense that even though
-    the result is modeled through an SSA value, the insertion is eventually
-    done "in place", and referencing the old SSA value is undefined behavior.
-    This operation is scheduled to be unified with the dense counterpart
-    `tensor.insert` that has pure SSA semantics.
-
-    Example:
-
-    ```mlir
-    %result = sparse_tensor.insert %val into %tensor[%i,%j] : tensor<1024x1024xf64, #CSR>
-    ```
-  }];
-  let assemblyFormat = "$value `into` $tensor `[` $lvlCoords `]` attr-dict"
-                       "`:` type($tensor)";
-  let hasVerifier = 1;
-}
-
 def SparseTensor_PushBackOp : SparseTensor_Op<"push_back",
     [TypesMatchWith<"value type matches element type of inBuffer",
                     "inBuffer", "value",
diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
index c19907a945d3bb..7750efdd9add0f 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
@@ -1741,15 +1741,6 @@ LogicalResult ConcatenateOp::verify() {
   return success();
 }
 
-LogicalResult InsertOp::verify() {
-  const auto stt = getSparseTensorType(getTensor());
-  if (stt.getEncoding().getBatchLvlRank() > 0)
-    return emitOpError("batched sparse tensor insertion not implemented");
-  if (stt.getLvlRank() != static_cast<Level>(getLvlCoords().size()))
-    return emitOpError("incorrect number of coordinates");
-  return success();
-}
-
 void PushBackOp::build(OpBuilder &builder, OperationState &result,
                        Value curSize, Value inBuffer, Value value) {
   build(builder, result, curSize, inBuffer, value, Value());
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/BufferizableOpInterfaceImpl.cpp
index 3f4ae1f67de150..a942a721e218fe 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -187,27 +187,6 @@ struct DisassembleOpInterface
   }
 };
 
-struct InsertOpInterface : public SparseBufferizableOpInterfaceExternalModel<
-                               InsertOpInterface, sparse_tensor::InsertOp> {
-  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
-                              const AnalysisState &state) const {
-    return true;
-  }
-
-  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
-                               const AnalysisState &state) const {
-    // InsertOp writes to memory.
-    return true;
-  }
-
-  AliasingValueList getAliasingValues(Operation *op, OpOperand &opOperand,
-                                      const AnalysisState &state) const {
-    // InsertOp returns an alias of its operand.
-    assert(op->getNumResults() == 1);
-    return {{op->getOpResult(0), BufferRelation::Equivalent}};
-  }
-};
-
 struct NumberOfEntriesOpInterface
     : public SparseBufferizableOpInterfaceExternalModel<
           NumberOfEntriesOpInterface, sparse_tensor::NumberOfEntriesOp> {
@@ -324,7 +303,6 @@ void mlir::sparse_tensor::registerBufferizableOpInterfaceExternalModels(
     sparse_tensor::ConvertOp::attachInterface<ConvertOpInterface>(*ctx);
     sparse_tensor::LoadOp::attachInterface<LoadOpInterface>(*ctx);
     sparse_tensor::NewOp::attachInterface<NewOpInterface>(*ctx);
-    sparse_tensor::InsertOp::attachInterface<InsertOpInterface>(*ctx);
     sparse_tensor::NumberOfEntriesOp::attachInterface<
         NumberOfEntriesOpInterface>(*ctx);
     sparse_tensor::AssembleOp::attachInterface<AssembleOpInterface>(*ctx);
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseReinterpretMap.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseReinterpretMap.cpp
index fbe2fc31ab8b15..f93b59de29e57b 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseReinterpretMap.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseReinterpretMap.cpp
@@ -640,14 +640,14 @@ struct TensorInsertDemapper
   using DemapInsRewriter::DemapInsRewriter;
   LogicalResult rewriteOp(tensor::InsertOp op, OpAdaptor adaptor,
                           PatternRewriter &rewriter) const {
-    if (!hasAnySparseResult(op))
+    if (!hasAnySparseResult(op) || !hasAnyNonIdentityOperandsOrResults(op))
       return failure();
 
     Location loc = op.getLoc();
     auto stt = getSparseTensorType(op.getResult());
     ValueRange lvlCrd = stt.translateCrds(rewriter, loc, op.getIndices(),
                                           CrdTransDirectionKind::dim2lvl);
-    auto insertOp = rewriter.create<sparse_tensor::InsertOp>(
+    auto insertOp = rewriter.create<tensor::InsertOp>(
         loc, op.getScalar(), adaptor.getDest(), lvlCrd);
 
     Value out = genRemap(rewriter, stt.getEncoding(), insertOp.getResult());
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
index 44c5d4dbe485bf..7ff2fc25328a6c 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
@@ -1014,24 +1014,29 @@ class SparseCompressConverter : public OpConversionPattern<CompressOp> {
 };
 
 /// Sparse codegen rule for the insert operator.
-class SparseInsertConverter : public OpConversionPattern<InsertOp> {
+class SparseInsertConverter : public OpConversionPattern<tensor::InsertOp> {
 public:
   using OpConversionPattern::OpConversionPattern;
   LogicalResult
-  matchAndRewrite(InsertOp op, OpAdaptor adaptor,
+  matchAndRewrite(tensor::InsertOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
+    auto stt = getSparseTensorType(adaptor.getDest());
+    if (!stt.hasEncoding())
+      return failure();
+    assert(stt.isIdentity() && "Run reinterpret-map before conversion.");
+
     Location loc = op.getLoc();
-    auto desc = getDescriptorFromTensorTuple(adaptor.getTensor());
+    auto desc = getDescriptorFromTensorTuple(adaptor.getDest());
     TypeRange flatSpTensorTps = desc.getFields().getTypes();
     SmallVector<Value> params = llvm::to_vector(desc.getFields());
-    params.append(adaptor.getLvlCoords().begin(), adaptor.getLvlCoords().end());
-    params.push_back(adaptor.getValue());
-    SparseInsertGenerator insertGen(op.getTensor().getType(), flatSpTensorTps,
+    params.append(adaptor.getIndices().begin(), adaptor.getIndices().end());
+    params.push_back(adaptor.getScalar());
+    SparseInsertGenerator insertGen(op.getDest().getType(), flatSpTensorTps,
                                     params, /*genCall=*/true);
     SmallVector<Value> ret = insertGen.genCallOrInline(rewriter, loc);
     // Replace operation with resulting memrefs.
     rewriter.replaceOp(op,
-                       genTuple(rewriter, loc, op.getTensor().getType(), ret));
+                       genTuple(rewriter, loc, op.getDest().getType(), ret));
     return success();
   }
 };
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index 010c3aa58b72c7..0937c10f257283 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -580,17 +580,24 @@ class SparseTensorLoadConverter : public OpConversionPattern<LoadOp> {
 };
 
 /// Sparse conversion rule for the insertion operator.
-class SparseTensorInsertConverter : public OpConversionPattern<InsertOp> {
+class SparseTensorInsertConverter
+    : public OpConversionPattern<tensor::InsertOp> {
 public:
   using OpConversionPattern::OpConversionPattern;
   LogicalResult
-  matchAndRewrite(InsertOp op, OpAdaptor adaptor,
+  matchAndRewrite(tensor::InsertOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     // Note that the current regime only allows for strict lexicographic
     // coordinate order. All values are passed by reference through stack
     // allocated memrefs.
     Location loc = op->getLoc();
-    const auto stt = getSparseTensorType(op.getTensor());
+    const auto stt = getSparseTensorType(op.getDest());
+
+    // Dense tensor insertion.
+    if (!stt.hasEncoding())
+      return failure();
+
+    assert(stt.isIdentity() && "Run reinterpret-map before conversion.");
     const auto elemTp = stt.getElementType();
     const Level lvlRank = stt.getLvlRank();
     Value lvlCoords, vref;
@@ -608,12 +615,12 @@ class SparseTensorInsertConverter : public OpConversionPattern<InsertOp> {
       lvlCoords = genAlloca(rewriter, loc, lvlRank, rewriter.getIndexType());
       vref = genAllocaScalar(rewriter, loc, elemTp);
     }
-    storeAll(rewriter, loc, lvlCoords, adaptor.getLvlCoords());
-    rewriter.create<memref::StoreOp>(loc, adaptor.getValue(), vref);
+    storeAll(rewriter, loc, lvlCoords, adaptor.getIndices());
+    rewriter.create<memref::StoreOp>(loc, adaptor.getScalar(), vref);
     SmallString<12> name{"lexInsert", primaryTypeFunctionSuffix(elemTp)};
     createFuncCall(rewriter, loc, name, {},
-                   {adaptor.getTensor(), lvlCoords, vref}, EmitCInterface::On);
-    rewriter.replaceOp(op, adaptor.getTensor());
+                   {adaptor.getDest(), lvlCoords, vref}, EmitCInterface::On);
+    rewriter.replaceOp(op, adaptor.getDest());
     return success();
   }
 };
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
index a65bce78d095cf..17f70d0796ccfc 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
@@ -817,7 +817,8 @@ struct TensorReshapeRewriter : public OpRewritePattern<tensor::ReshapeOp> {
           reshapeCvs(builder, loc, expandReass, collapsedSizes, collapsedDcvs,
                      dstSizes, dstDcvs);
 
-          auto t = builder.create<InsertOp>(loc, v, reduc.front(), dstDcvs);
+          auto t =
+              builder.create<tensor::InsertOp>(loc, v, reduc.front(), dstDcvs);
           builder.create<sparse_tensor::YieldOp>(loc, t);
         });
 
@@ -901,7 +902,8 @@ struct Sparse2SparseReshapeRewriter : public OpRewritePattern<ReshapeOp> {
           SmallVector<Value> dstDcvs;
           reshapeCvs(builder, loc, op.getReassociationIndices(), srcSizes,
                      srcDcvs, dstSizes, dstDcvs);
-          auto t = builder.create<InsertOp>(loc, v, reduc.front(), dstDcvs);
+          auto t =
+              builder.create<tensor::InsertOp>(loc, v, reduc.front(), dstDcvs);
           builder.create<sparse_tensor::YieldOp>(loc, t);
         });
 
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
index 1fb70ed5035c03..cd046b670d9a8e 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -428,7 +428,7 @@ static void genInsertionStore(CodegenEnv &env, OpBuilder &builder, OpOperand *t,
           /*else=*/true);
       // True branch.
       builder.setInsertionPointToStart(ifValidLexInsert.thenBlock());
-      Value res = builder.create<InsertOp>(loc, rhs, chain, ivs);
+      Value res = builder.create<tensor::InsertOp>(loc, rhs, chain, ivs);
       builder.create<scf::YieldOp>(loc, res);
       // False branch.
       builder.setInsertionPointToStart(ifValidLexInsert.elseBlock());
@@ -438,7 +438,8 @@ static void genInsertionStore(CodegenEnv &env, OpBuilder &builder, OpOperand *t,
       env.updateInsertionChain(ifValidLexInsert.getResult(0));
     } else {
       // Generates regular insertion chain.
-      env.updateInsertionChain(builder.create<InsertOp>(loc, rhs, chain, ivs));
+      env.updateInsertionChain(
+          builder.create<tensor::InsertOp>(loc, rhs, chain, ivs));
     }
     return;
   }
diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir
index b63762485c961f..40bfa1e4e2a501 100644
--- a/mlir/test/Dialect/SparseTensor/codegen.mlir
+++ b/mlir/test/Dialect/SparseTensor/codegen.mlir
@@ -643,7 +643,7 @@ func.func @sparse_compression_unordered(%tensor: tensor<8x8xf64, #UCSR>,
 //       CHECK: %[[R:.*]]:4 = call @_insert_compressed_128_f64_0_0(%[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A5]], %[[A6]])
 //       CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3
 func.func @sparse_insert(%arg0: tensor<128xf64, #SV>, %arg1: index, %arg2: f64) -> tensor<128xf64, #SV> {
-  %0 = sparse_tensor.insert %arg2 into %arg0[%arg1] : tensor<128xf64, #SV>
+  %0 = tensor.insert %arg2 into %arg0[%arg1] : tensor<128xf64, #SV>
   %1 = sparse_tensor.load %0 hasInserts : tensor<128xf64, #SV>
   return %1 : tensor<128xf64, #SV>
 }
@@ -666,7 +666,7 @@ func.func @sparse_insert(%arg0: tensor<128xf64, #SV>, %arg1: index, %arg2: f64)
 //       CHECK: %[[R:.*]]:4 = call @_insert_compressed_128_f64_64_32(%[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A5]], %[[A6]])
 //       CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3
 func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: index, %arg2: f64) -> tensor<128xf64, #SparseVector> {
-  %0 = sparse_tensor.insert %arg2 into %arg0[%arg1] : tensor<128xf64, #SparseVector>
+  %0 = tensor.insert %arg2 into %arg0[%arg1] : tensor<128xf64, #SparseVector>
   %1 = sparse_tensor.load %0 hasInserts : tensor<128xf64, #SparseVector>
   return %1 : tensor<128xf64, #SparseVector>
 }
@@ -690,7 +690,7 @@ func.func @sparse_insert_typed(%arg0: tensor<128xf64, #SparseVector>, %arg1: ind
 //       CHECK: %[[R:.*]]:4 = call @_insert_compressed_nonunique_singleton_5_6_f64_0_0(%[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A4]], %[[A5]])
 //       CHECK: return %[[R]]#0, %[[R]]#1, %[[R]]#2, %[[R]]#3
 func.func @sparse_insert_coo(%arg0: tensor<5x6xf64, #Coo>, %arg1: index, %arg2: f64) -> tensor<5x6xf64, #Coo> {
-  %0 = sparse_tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo>
+  %0 = tensor.insert %arg2 into %arg0[%arg1, %arg1] : tensor<5x6xf64, #Coo>
   %1 = sparse_tensor.load %0 hasInserts : tensor<5x6xf64, #Coo>
   return %1 : tensor<5x6xf64, #Coo>
 }
diff --git a/mlir/test/Dialect/SparseTensor/constant_index_map.mlir b/mlir/test/Dialect/SparseTensor/constant_index_map.mlir
index eaef6a31585292..f9559ce648c783 100644
--- a/mlir/test/Dialect/SparseTensor/constant_index_map.mlir
+++ b/mlir/test/Dialect/SparseTensor/constant_index_map.mlir
@@ -20,7 +20,7 @@
 // CHECK:             %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]], %[[VAL_9]]] : memref<1x77xi1>
 // CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_3]], %[[VAL_9]]] : memref<1x77xi1>
 // CHECK:             %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_12]] : i1
-// CHECK:             %[[VAL_14:.*]] = sparse_tensor.insert %[[VAL_13]] into %[[VAL_10]]{{\[}}%[[VAL_9]]] : tensor<77xi1, #{{.*}}>
+// CHECK:             %[[VAL_14:.*]] = tensor.insert %[[VAL_13]] into %[[VAL_10]]{{\[}}%[[VAL_9]]] : tensor<77xi1, #{{.*}}>
 // CHECK:             scf.yield %[[VAL_14]] : tensor<77xi1, #{{.*}}>
 // CHECK:           }
 // CHECK:           %[[VAL_15:.*]] = sparse_tensor.load %[[VAL_16:.*]] hasInserts : tensor<77xi1, #{{.*}}>
diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir
index 465f2108626606..f23f6ac4f181e2 100644
--- a/mlir/test/Dialect/SparseTensor/conversion.mlir
+++ b/mlir/test/Dialect/SparseTensor/conversion.mlir
@@ -318,7 +318,7 @@ func.func @sparse_reconstruct_ins(%arg0: tensor<128xf32, #SparseVector>) -> tens
 func.func @sparse_insert(%arg0: tensor<128xf32, #SparseVector>,
                          %arg1: index,
                          %arg2: f32) -> tensor<128xf32, #SparseVector> {
-  %0 = sparse_tensor.insert %arg2 into %arg0[%arg1] : tensor<128xf32, #SparseVector>
+  %0 = tensor.insert %arg2 into %arg0[%arg1] : tensor<128xf32, #SparseVector>
   return %0 : tensor<128xf32, #SparseVector>
 }
 
diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir
index eac97f702f58bd..48f28ef390ed53 100644
--- a/mlir/test/Dialect/SparseTensor/invalid.mlir
+++ b/mlir/test/Dialect/SparseTensor/invalid.mlir
@@ -290,24 +290,6 @@ func.func @sparse_unannotated_load(%arg0: tensor<16x32xf64>) -> tensor<16x32xf64
 
 // -----
 
-func.func @sparse_unannotated_insert(%arg0: tensor<128xf64>, %arg1: index, %arg2: f64) {
-  // expected-error at +1 {{'sparse_tensor.insert' 'tensor' must be sparse tensor of any type values, but got 'tensor<128xf64>'}}
-  sparse_tensor.insert %arg2 into %arg0[%arg1] : tensor<128xf64>
-  return
-}
-
-// -----
-
-#CSR = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : dense, d1 : compressed)}>
-
-func.func @sparse_wrong_arity_insert(%arg0: tensor<128x64xf64, #CSR>, %arg1: index, %arg2: f64) {
-  // expected-error at +1 {{'sparse_tensor.insert' op incorrect number of coordinates}}
-  sparse_tensor.insert %arg2 into %arg0[%arg1] : tensor<128x64xf64, #CSR>
-  return
-}
-
-// -----
-
 func.func @sparse_push_back(%arg0: index, %arg1: memref<?xf64>, %arg2: f32) -> (memref<?xf64>, index) {
   // expected-error at +1 {{'sparse_tensor.push_back' op failed to verify that value type matches element type of inBuffer}}
   %0:2 = sparse_tensor.push_back %arg0, %arg1, %arg2 : index, memref<?xf64>, f32
diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
index 41094fbad9218f..e9e458e805ba47 100644
--- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir
+++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
@@ -311,10 +311,10 @@ func.func @sparse_load_ins(%arg0: tensor<16x32xf64, #DenseMatrix>) -> tensor<16x
 //  CHECK-SAME: %[[A:.*]]: tensor<128xf64, #sparse{{[0-9]*}}>,
 //  CHECK-SAME: %[[B:.*]]: index,
 //  CHECK-SAME: %[[C:.*]]: f64)
-//       CHECK: %[[T:.*]] = sparse_tensor.insert %[[C]] into %[[A]][%[[B]]] : tensor<128xf64, #{{.*}}>
+//       CHECK: %[[T:.*]] = tensor.insert %[[C]] into %[[A]][%[[B]]] : tensor<128xf64, #{{.*}}>
 //       CHECK: return %[[T]] : tensor<128xf64, #{{.*}}>
 func.func @sparse_insert(%arg0: tensor<128xf64, #SparseVector>, %arg1: index, %arg2: f64) -> tensor<128xf64, #SparseVector> {
-  %0 = sparse_tensor.insert %arg2 into %arg0[%arg1] : tensor<128xf64, #SparseVector>
+  %0 = tensor.insert %arg2 into %arg0[%arg1] : tensor<128xf64, #SparseVector>
   return %0 : tensor<128xf64, #SparseVector>
 }
 
diff --git a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
index 85ae0db916899e..4afa0a8ceccd4b 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
@@ -1090,20 +1090,20 @@ func.func @cmp_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T
 // CHECK:                 %[[VAL_41:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_30]]] : memref<?xf64>
 // CHECK:                 %[[VAL_42:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_31]]] : memref<?xf64>
 // CHECK:                 %[[VAL_43:.*]] = arith.subf %[[VAL_41]], %[[VAL_42]] : f64
-// CHECK:                 %[[VAL_44:.*]] = sparse_tensor.insert %[[VAL_43]] into %[[VAL_32]]{{\[}}%[[VAL_13]], %[[VAL_36]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
+// CHECK:                 %[[VAL_44:.*]] = tensor.insert %[[VAL_43]] into %[[VAL_32]]{{\[}}%[[VAL_13]], %[[VAL_36]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:                 scf.yield %[[VAL_44]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:               } else {
 // CHECK:                 %[[VAL_45:.*]] = arith.cmpi eq, %[[VAL_33]], %[[VAL_36]] : index
 // CHECK:                 %[[VAL_46:.*]] = scf.if %[[VAL_45]] -> (tensor<2x3xf64, #sparse{{[0-9]*}}>) {
 // CHECK:                   %[[VAL_47:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_30]]] : memref<?xf64>
-// CHECK:                   %[[VAL_48:.*]] = sparse_tensor.insert %[[VAL_47]] into %[[VAL_32]]{{\[}}%[[VAL_13]], %[[VAL_36]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
+// CHECK:                   %[[VAL_48:.*]] = tensor.insert %[[VAL_47]] into %[[VAL_32]]{{\[}}%[[VAL_13]], %[[VAL_36]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:                   scf.yield %[[VAL_48]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:                 } else {
 // CHECK:                   %[[VAL_49:.*]] = arith.cmpi eq, %[[VAL_34]], %[[VAL_36]] : index
 // CHECK:                   %[[VAL_50:.*]] = scf.if %[[VAL_49]] -> (tensor<2x3xf64, #sparse{{[0-9]*}}>) {
 // CHECK:                     %[[VAL_51:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_31]]] : memref<?xf64>
 // CHECK:                     %[[VAL_52:.*]] = arith.negf %[[VAL_51]] : f64
-// CHECK:                     %[[VAL_53:.*]] = sparse_tensor.insert %[[VAL_52]] into %[[VAL_32]]{{\[}}%[[VAL_13]], %[[VAL_36]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
+// CHECK:                     %[[VAL_53:.*]] = tensor.insert %[[VAL_52]] into %[[VAL_32]]{{\[}}%[[VAL_13]], %[[VAL_36]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:                     scf.yield %[[VAL_53]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:                   } else {
 // CHECK:                     scf.yield %[[VAL_32]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
@@ -1123,14 +1123,14 @@ func.func @cmp_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T
 // CHECK:             %[[VAL_63:.*]] = scf.for %[[VAL_64:.*]] = %[[VAL_65:.*]]#0 to %[[VAL_18]] step %[[VAL_4]] iter_args(%[[VAL_66:.*]] = %[[VAL_65]]#2)
 // CHECK:               %[[VAL_67:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_64]]] : memref<?xindex>
 // CHECK:               %[[VAL_68:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_64]]] : memref<?xf64>
-// CHECK:               %[[VAL_69:.*]] = sparse_tensor.insert %[[VAL_68]] into %[[VAL_66]]{{\[}}%[[VAL_13]], %[[VAL_67]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
+// CHECK:               %[[VAL_69:.*]] = tensor.insert %[[VAL_68]] into %[[VAL_66]]{{\[}}%[[VAL_13]], %[[VAL_67]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:               scf.yield %[[VAL_69]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:             }
 // CHECK:             %[[VAL_70:.*]] = scf.for %[[VAL_71:.*]] = %[[VAL_72:.*]]#1 to %[[VAL_22]] step %[[VAL_4]] iter_args(%[[VAL_73:.*]] = %[[VAL_74:.*]])
 // CHECK:               %[[VAL_75:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_71]]] : memref<?xindex>
 // CHECK:               %[[VAL_76:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_71]]] : memref<?xf64>
 // CHECK:               %[[VAL_77:.*]] = arith.negf %[[VAL_76]] : f64
-// CHECK:               %[[VAL_78:.*]] = sparse_tensor.insert %[[VAL_77]] into %[[VAL_73]]{{\[}}%[[VAL_13]], %[[VAL_75]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
+// CHECK:               %[[VAL_78:.*]] = tensor.insert %[[VAL_77]] into %[[VAL_73]]{{\[}}%[[VAL_13]], %[[VAL_75]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:               scf.yield %[[VAL_78]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:             }
 // CHECK:             scf.yield %[[VAL_79:.*]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir b/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir
index 278450fabd74ec..a409329700ffd7 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir
@@ -33,7 +33,7 @@
 //       CHECK:      %[[L2:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_10]] to %[[TMP_12]] step %[[TMP_c1]] {{.*}} {
 //       CHECK:        %[[TMP_13:.*]] = memref.load %[[TMP_4]][%[[TMP_arg3]]] : memref<?xindex>
 //       CHECK:        %[[TMP_14:.*]] = memref.load %[[TMP_5]][%[[TMP_arg3]]] : memref<?xi32>
-//       CHECK:        %[[Y:.*]] = sparse_tensor.insert %[[TMP_14]] into %{{.*}}[%[[TMP_9]], %[[TMP_arg2]], %[[TMP_13]]]
+//       CHECK:        %[[Y:.*]] = tensor.insert %[[TMP_14]] into %{{.*}}[%[[TMP_9]], %[[TMP_arg2]], %[[TMP_13]]]
 //       CHECK:        scf.yield %[[Y]]
 //       CHECK:      }
 //       CHECK:      scf.yield %[[L2]]
diff --git a/mlir/test/Dialect/SparseTensor/sparse_conv_2d_slice_based.mlir b/mlir/test/Dialect/SparseTensor/sparse_conv_2d_slice_based.mlir
index 6076c1fbe76f21..bf3473ead204eb 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_conv_2d_slice_based.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_conv_2d_slice_based.mlir
@@ -41,7 +41,7 @@
 // CHECK:                 scf.yield
 // CHECK:               }
 // CHECK:               scf.if {{.*}} {
-// CHECK:                 sparse_tensor.insert %{{.*}} into %{{.*}}{{\[}}%[[D0]], %[[D1]]]
+// CHECK:                 tensor.insert %{{.*}} into %{{.*}}{{\[}}%[[D0]], %[[D1]]]
 // CHECK:                 scf.yield
 // CHECK:               } else {
 // CHECK:                 scf.yield
diff --git a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir
index 8c09523174bd76..07b2c3c22995d1 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir
@@ -371,7 +371,7 @@ func.func @divbyc(%arga: tensor<32xf64, #SV>,
 // CHECK:             %[[VAL_17:.*]] = math.log1p %[[VAL_16]] : f64
 // CHECK:             %[[VAL_18:.*]] = math.sin %[[VAL_17]] : f64
 // CHECK:             %[[VAL_19:.*]] = math.tanh %[[VAL_18]] : f64
-// CHECK:             %[[Y:.*]] = sparse_tensor.insert %[[VAL_19]] into %{{.*}}[%[[VAL_10]]] : tensor<32xf64, #sparse{{[0-9]*}}>
+// CHECK:             %[[Y:.*]] = tensor.insert %[[VAL_19]] into %{{.*}}[%[[VAL_10]]] : tensor<32xf64, #sparse{{[0-9]*}}>
 // CHECK:             scf.yield %[[Y]]
 // CHECK:           }
 // CHECK:           %[[VAL_20:.*]] = sparse_tensor.load %[[T]] hasInserts : tensor<32xf64, #sparse{{[0-9]*}}>
@@ -412,7 +412,7 @@ func.func @zero_preserving_math(%arga: tensor<32xf64, #SV>) -> tensor<32xf64, #S
 // CHECK:             %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<?xindex>
 // CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_10]]] : memref<?xcomplex<f64>>
 // CHECK:             %[[VAL_13:.*]] = complex.div %[[VAL_12]], %[[VAL_3]] : complex<f64>
-// CHECK:             %[[Y:.*]] = sparse_tensor.insert %[[VAL_13]] into %{{.*}}[%[[VAL_11]]] : tensor<32xcomplex<f64>, #sparse{{[0-9]*}}>
+// CHECK:             %[[Y:.*]] = tensor.insert %[[VAL_13]] into %{{.*}}[%[[VAL_11]]] : tensor<32xcomplex<f64>, #sparse{{[0-9]*}}>
 // CHECK:             scf.yield %[[Y]]
 // CHECK:           }
 // CHECK:           %[[VAL_14:.*]] = sparse_tensor.load %[[T]] hasInserts : tensor<32xcomplex<f64>, #sparse{{[0-9]*}}>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_index.mlir b/mlir/test/Dialect/SparseTensor/sparse_index.mlir
index 3e8b485f63df97..7fe662893f4b18 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_index.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_index.mlir
@@ -95,7 +95,7 @@ func.func @dense_index(%arga: tensor<?x?xi64, #DenseMatrix>)
 // CHECK:               %[[VAL_22:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_18]]] : memref<?xi64>
 // CHECK:               %[[VAL_23:.*]] = arith.muli %[[VAL_21]], %[[VAL_22]] : i64
 // CHECK:               %[[VAL_24:.*]] = arith.muli %[[VAL_20]], %[[VAL_23]] : i64
-// CHECK:               %[[Y:.*]] = sparse_tensor.insert %[[VAL_24]] into %{{.*}}[%[[VAL_14]], %[[VAL_19]]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK:               %[[Y:.*]] = tensor.insert %[[VAL_24]] into %{{.*}}[%[[VAL_14]], %[[VAL_19]]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
 // CHECK:               scf.yield %[[Y]]
 // CHECK:             }
 // CHECK:             scf.yield %[[L]]
diff --git a/mlir/test/Dialect/SparseTensor/sparse_out.mlir b/mlir/test/Dialect/SparseTensor/sparse_out.mlir
index b1795ff2e1a226..08b81b54a9e63c 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_out.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_out.mlir
@@ -114,7 +114,7 @@ func.func @sparse_simply_dynamic2(%argx: tensor<32x16xf32, #DCSR>) -> tensor<32x
 // CHECK:               %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_16]]] : memref<?xindex>
 // CHECK:               %[[VAL_19:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_16]]] : memref<?xf32>
 // CHECK:               %[[VAL_20:.*]] = arith.mulf %[[VAL_19]], %[[VAL_4]] : f32
-// CHECK:               %[[VAL_21:.*]] = sparse_tensor.insert %[[VAL_20]] into %[[VAL_17]]{{\[}}%[[VAL_10]], %[[VAL_18]]] : tensor<10x20xf32, #sparse{{[0-9]*}}>
+// CHECK:               %[[VAL_21:.*]] = tensor.insert %[[VAL_20]] into %[[VAL_17]]{{\[}}%[[VAL_10]], %[[VAL_18]]] : tensor<10x20xf32, #sparse{{[0-9]*}}>
 // CHECK:               scf.yield %[[VAL_21]] : tensor<10x20xf32, #sparse{{[0-9]*}}>
 // CHECK:             }
 // CHECK:             scf.yield %[[VAL_22:.*]] : tensor<10x20xf32, #sparse{{[0-9]*}}>
@@ -248,7 +248,7 @@ func.func @sparse_truly_dynamic(%arga: tensor<10x20xf32, #CSR>) -> tensor<10x20x
 // CHECK:                     scf.yield %[[VAL_100]], %[[VAL_103]], %[[VAL_104:.*]]#0, %[[VAL_104]]#1, %[[VAL_104]]#2 : index, index, i32, i1, tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:                   }
 // CHECK:                   %[[VAL_202:.*]] = scf.if %[[VAL_74]]#3 -> (tensor<?x?xi32, #sparse{{[0-9]*}}>) {
-// CHECK:                     %[[VAL_105:.*]] = sparse_tensor.insert %[[VAL_74]]#2 into %[[VAL_74]]#4{{\[}}%[[VAL_39]], %[[VAL_63]]] : tensor<?x?xi32, #sparse{{[0-9]*}}>
+// CHECK:                     %[[VAL_105:.*]] = tensor.insert %[[VAL_74]]#2 into %[[VAL_74]]#4{{\[}}%[[VAL_39]], %[[VAL_63]]] : tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:                     scf.yield %[[VAL_105]] : tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:                   } else {
 // CHECK:                     scf.yield %[[VAL_74]]#4 : tensor<?x?xi32, #sparse{{[0-9]*}}>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_reinterpret_map.mlir b/mlir/test/Dialect/SparseTensor/sparse_reinterpret_map.mlir
index aa17261724dbc0..97c668716eecc3 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_reinterpret_map.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_reinterpret_map.mlir
@@ -63,7 +63,7 @@ func.func @mul(%arg0: tensor<32x32xf32>,
 // CHECK:           %[[VAL_2:.*]] = sparse_tensor.reinterpret_map %[[VAL_0]] : tensor<2x4xf64, #[[$remap]]> to tensor<1x2x2x2xf64, #[[$demap]]>
 // CHECK:           %[[VAL_4:.*]] = sparse_tensor.foreach in %[[VAL_2]] init(%[[VAL_1]])
 // CHECK:           ^bb0(%[[VAL_5:.*]]: index, %[[VAL_6:.*]]: index, %[[VAL_7:.*]]: index, %[[VAL_8:.*]]: index, %[[VAL_9:.*]]: f64, %[[VAL_10:.*]]: tensor<1x2x2x2xf64, #[[$demap]]>
-// CHECK:             %[[VAL_11:.*]] = sparse_tensor.insert %[[VAL_9]] into %[[VAL_10]]{{\[}}%[[VAL_5]], %[[VAL_6]], %[[VAL_7]], %[[VAL_8]]] : tensor<1x2x2x2xf64, #[[$demap]]>
+// CHECK:             %[[VAL_11:.*]] = tensor.insert %[[VAL_9]] into %[[VAL_10]]{{\[}}%[[VAL_5]], %[[VAL_6]], %[[VAL_7]], %[[VAL_8]]] : tensor<1x2x2x2xf64, #[[$demap]]>
 // CHECK:             sparse_tensor.yield %[[VAL_11]] : tensor<1x2x2x2xf64, #sparse{{[0-9]*}}>
 // CHECK:           }
 // CHECK:           %[[VAL_12:.*]] = sparse_tensor.reinterpret_map %[[VAL_4]] : tensor<1x2x2x2xf64, #[[$demap]]> to tensor<2x4xf64, #[[$remap]]>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir
index eea77c6e5a6cc6..edb53fa024c26b 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir
@@ -31,7 +31,7 @@
 // CHECK:           %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[I]]] : memref<?xf64>
 // CHECK:           %[[DI0:.*]] = arith.divui %[[SI]], %[[C10]] : index
 // CHECK:           %[[DI1:.*]] = arith.remui %[[SI]], %[[C10]] : index
-// CHECK:           %[[NT:.*]] = sparse_tensor.insert %[[SV]] into %[[R]]{{\[}}%[[DI0]], %[[DI1]]]
+// CHECK:           %[[NT:.*]] = tensor.insert %[[SV]] into %[[R]]{{\[}}%[[DI0]], %[[DI1]]]
 // CHECK:           scf.yield %[[NT:.*]]
 // CHECK:         }
 // CHECK:         %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts
@@ -75,7 +75,7 @@ func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10x
 // CHECK:             %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref<?xf64>
 // CHECK:             %[[T:.*]] = arith.muli %[[SI0]], %[[C10]] : index
 // CHECK:             %[[DI:.*]] = arith.addi %[[T]], %[[SI1]] : index
-// CHECK:             %[[R1:.*]] = sparse_tensor.insert %[[SV]] into %[[A1]]{{\[}}%[[DI]]]
+// CHECK:             %[[R1:.*]] = tensor.insert %[[SV]] into %[[A1]]{{\[}}%[[DI]]]
 // CHECK              scf.yield %[[R1]]
 // CHECK            }
 // CHECK            scf.yield %[[RET_1]]
@@ -120,7 +120,7 @@ func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<10
 // CHECK:           %[[T3:.*]] = arith.remui %[[SI]], %[[T2]] : index
 // CHECK:           %[[T4:.*]] = arith.divui %[[T2]], %[[C10]] : index
 // CHECK:           %[[DI1:.*]] = arith.divui %[[T3]], %[[T4]] : index
-// CHECK:           %[[NT:.*]] = sparse_tensor.insert %[[SV]] into %[[R]]{{\[}}%[[DI0]], %[[DI1]]]
+// CHECK:           %[[NT:.*]] = tensor.insert %[[SV]] into %[[R]]{{\[}}%[[DI0]], %[[DI1]]]
 // CHECK:           scf.yield %[[NT]]
 // CHECK:         }
 // CHECK:         %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts
@@ -169,7 +169,7 @@ func.func @dynamic_sparse_expand(%arg0: tensor<?xf64, #SparseVector>) -> tensor<
 // CHECK:             %[[T3:.*]] = arith.divui %[[T1]], %[[SD1]] : index
 // CHECK:             %[[T4:.*]] = arith.muli %[[SI1]], %[[T3]] : index
 // CHECK:             %[[DI:.*]] = arith.addi %[[T2]], %[[T4]] : index
-// CHECK:             %[[NT:.*]] = sparse_tensor.insert %[[SV]] into %[[R1]]{{\[}}%[[DI]]]
+// CHECK:             %[[NT:.*]] = tensor.insert %[[SV]] into %[[R1]]{{\[}}%[[DI]]]
 // CHECK              scf.yield %[[NT]]
 // CHECK            }
 // CHECK            scf.yield %[[RET_1]]
diff --git a/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir
index 47d24ebf24fc6a..89826ebfe14d1a 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir
@@ -29,7 +29,7 @@
 // CHECK:             %[[DI:.*]] = arith.addi %[[T]], %[[SI1]] : index
 // CHECK:             %[[D:.*]] = arith.divui %[[DI]], %[[C10]] : index
 // CHECK:             %[[R:.*]] = arith.remui %[[DI]], %[[C10]] : index
-// CHECK:             %[[R1:.*]] = sparse_tensor.insert %[[SV]] into %[[A1]]{{\[}}%[[D]], %[[R]]]
+// CHECK:             %[[R1:.*]] = tensor.insert %[[SV]] into %[[A1]]{{\[}}%[[D]], %[[R]]]
 // CHECK:              scf.yield %[[R1]]
 // CHECK:            }
 // CHECK:            scf.yield %[[RET_1]]
diff --git a/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir b/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir
index 80a989d789df21..5038e977688dc9 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir
@@ -37,7 +37,7 @@
 // CHECK:             %[[VAL_19:.*]] = scf.for %[[VAL_20:.*]] = %[[VAL_16]] to %[[VAL_18]] step %[[VAL_2]] iter_args(%[[VAL_21:.*]] = %[[VAL_14]]) -> (tensor<4x3xf64, #sparse{{[0-9]*}}>) {
 // CHECK:               %[[VAL_22:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_20]]] : memref<?xindex>
 // CHECK:               %[[VAL_23:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_20]]] : memref<?xf64>
-// CHECK:               %[[VAL_24:.*]] = sparse_tensor.insert %[[VAL_23]] into %[[VAL_21]]{{\[}}%[[VAL_15]], %[[VAL_22]]] : tensor<4x3xf64, #sparse{{[0-9]*}}>
+// CHECK:               %[[VAL_24:.*]] = tensor.insert %[[VAL_23]] into %[[VAL_21]]{{\[}}%[[VAL_15]], %[[VAL_22]]] : tensor<4x3xf64, #sparse{{[0-9]*}}>
 // CHECK:               scf.yield %[[VAL_24]] : tensor<4x3xf64, #sparse{{[0-9]*}}>
 // CHECK:             }
 // CHECK:             scf.yield %[[VAL_25:.*]] : tensor<4x3xf64, #sparse{{[0-9]*}}>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_insert_1d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_insert_1d.mlir
index 61c68507ea5198..12e0d2267a265b 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_insert_1d.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_insert_1d.mlir
@@ -54,10 +54,10 @@ module {
 
     // Build the sparse vector from straightline code.
     %0 = tensor.empty() : tensor<1024xf32, #SparseVector>
-    %1 = sparse_tensor.insert %f1 into %0[%c0] : tensor<1024xf32, #SparseVector>
-    %2 = sparse_tensor.insert %f2 into %1[%c1] : tensor<1024xf32, #SparseVector>
-    %3 = sparse_tensor.insert %f3 into %2[%c3] : tensor<1024xf32, #SparseVector>
-    %4 = sparse_tensor.insert %f4 into %3[%c1023] : tensor<1024xf32, #SparseVector>
+    %1 = tensor.insert %f1 into %0[%c0] : tensor<1024xf32, #SparseVector>
+    %2 = tensor.insert %f2 into %1[%c1] : tensor<1024xf32, #SparseVector>
+    %3 = tensor.insert %f3 into %2[%c3] : tensor<1024xf32, #SparseVector>
+    %4 = tensor.insert %f4 into %3[%c1023] : tensor<1024xf32, #SparseVector>
     %5 = sparse_tensor.load %4 hasInserts : tensor<1024xf32, #SparseVector>
 
     //
@@ -76,7 +76,7 @@ module {
     %6 = tensor.empty() : tensor<1024xf32, #SparseVector>
     %7 = scf.for %i = %c0 to %c8 step %c1 iter_args(%vin = %6) -> tensor<1024xf32, #SparseVector> {
       %ii = arith.muli %i, %c3 : index
-      %vout = sparse_tensor.insert %f1 into %vin[%ii] : tensor<1024xf32, #SparseVector>
+      %vout = tensor.insert %f1 into %vin[%ii] : tensor<1024xf32, #SparseVector>
       scf.yield %vout : tensor<1024xf32, #SparseVector>
     }
     %8 = sparse_tensor.load %7 hasInserts : tensor<1024xf32, #SparseVector>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_insert_2d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_insert_2d.mlir
index d51b67792337d1..8831091506533f 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_insert_2d.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_insert_2d.mlir
@@ -72,10 +72,10 @@ module {
     // CHECK-NEXT: ----
     //
     %densea = tensor.empty() : tensor<4x3xf64, #Dense>
-    %dense1 = sparse_tensor.insert %f1 into %densea[%c0, %c0] : tensor<4x3xf64, #Dense>
-    %dense2 = sparse_tensor.insert %f2 into %dense1[%c2, %c2] : tensor<4x3xf64, #Dense>
-    %dense3 = sparse_tensor.insert %f3 into %dense2[%c3, %c0] : tensor<4x3xf64, #Dense>
-    %dense4 = sparse_tensor.insert %f4 into %dense3[%c3, %c2] : tensor<4x3xf64, #Dense>
+    %dense1 = tensor.insert %f1 into %densea[%c0, %c0] : tensor<4x3xf64, #Dense>
+    %dense2 = tensor.insert %f2 into %dense1[%c2, %c2] : tensor<4x3xf64, #Dense>
+    %dense3 = tensor.insert %f3 into %dense2[%c3, %c0] : tensor<4x3xf64, #Dense>
+    %dense4 = tensor.insert %f4 into %dense3[%c3, %c2] : tensor<4x3xf64, #Dense>
     %densem = sparse_tensor.load %dense4 hasInserts : tensor<4x3xf64, #Dense>
     sparse_tensor.print %densem : tensor<4x3xf64, #Dense>
 
@@ -93,10 +93,10 @@ module {
     // CHECK-NEXT: ----
     //
     %cooa = tensor.empty() : tensor<4x3xf64, #SortedCOO>
-    %coo1 = sparse_tensor.insert %f1 into %cooa[%c0, %c0] : tensor<4x3xf64, #SortedCOO>
-    %coo2 = sparse_tensor.insert %f2 into %coo1[%c2, %c2] : tensor<4x3xf64, #SortedCOO>
-    %coo3 = sparse_tensor.insert %f3 into %coo2[%c3, %c0] : tensor<4x3xf64, #SortedCOO>
-    %coo4 = sparse_tensor.insert %f4 into %coo3[%c3, %c2] : tensor<4x3xf64, #SortedCOO>
+    %coo1 = tensor.insert %f1 into %cooa[%c0, %c0] : tensor<4x3xf64, #SortedCOO>
+    %coo2 = tensor.insert %f2 into %coo1[%c2, %c2] : tensor<4x3xf64, #SortedCOO>
+    %coo3 = tensor.insert %f3 into %coo2[%c3, %c0] : tensor<4x3xf64, #SortedCOO>
+    %coo4 = tensor.insert %f4 into %coo3[%c3, %c2] : tensor<4x3xf64, #SortedCOO>
     %coom = sparse_tensor.load %coo4 hasInserts : tensor<4x3xf64, #SortedCOO>
     sparse_tensor.print %coom : tensor<4x3xf64, #SortedCOO>
 
@@ -113,10 +113,10 @@ module {
     // CHECK-NEXT: ----
     //
     %csra = tensor.empty() : tensor<4x3xf64, #CSR>
-    %csr1 = sparse_tensor.insert %f1 into %csra[%c0, %c0] : tensor<4x3xf64, #CSR>
-    %csr2 = sparse_tensor.insert %f2 into %csr1[%c2, %c2] : tensor<4x3xf64, #CSR>
-    %csr3 = sparse_tensor.insert %f3 into %csr2[%c3, %c0] : tensor<4x3xf64, #CSR>
-    %csr4 = sparse_tensor.insert %f4 into %csr3[%c3, %c2] : tensor<4x3xf64, #CSR>
+    %csr1 = tensor.insert %f1 into %csra[%c0, %c0] : tensor<4x3xf64, #CSR>
+    %csr2 = tensor.insert %f2 into %csr1[%c2, %c2] : tensor<4x3xf64, #CSR>
+    %csr3 = tensor.insert %f3 into %csr2[%c3, %c0] : tensor<4x3xf64, #CSR>
+    %csr4 = tensor.insert %f4 into %csr3[%c3, %c2] : tensor<4x3xf64, #CSR>
     %csrm = sparse_tensor.load %csr4 hasInserts : tensor<4x3xf64, #CSR>
     sparse_tensor.print %csrm : tensor<4x3xf64, #CSR>
 
@@ -135,10 +135,10 @@ module {
     // CHECK-NEXT: ----
     //
     %dcsra = tensor.empty() : tensor<4x3xf64, #DCSR>
-    %dcsr1 = sparse_tensor.insert %f1 into %dcsra[%c0, %c0] : tensor<4x3xf64, #DCSR>
-    %dcsr2 = sparse_tensor.insert %f2 into %dcsr1[%c2, %c2] : tensor<4x3xf64, #DCSR>
-    %dcsr3 = sparse_tensor.insert %f3 into %dcsr2[%c3, %c0] : tensor<4x3xf64, #DCSR>
-    %dcsr4 = sparse_tensor.insert %f4 into %dcsr3[%c3, %c2] : tensor<4x3xf64, #DCSR>
+    %dcsr1 = tensor.insert %f1 into %dcsra[%c0, %c0] : tensor<4x3xf64, #DCSR>
+    %dcsr2 = tensor.insert %f2 into %dcsr1[%c2, %c2] : tensor<4x3xf64, #DCSR>
+    %dcsr3 = tensor.insert %f3 into %dcsr2[%c3, %c0] : tensor<4x3xf64, #DCSR>
+    %dcsr4 = tensor.insert %f4 into %dcsr3[%c3, %c2] : tensor<4x3xf64, #DCSR>
     %dcsrm = sparse_tensor.load %dcsr4 hasInserts : tensor<4x3xf64, #DCSR>
     sparse_tensor.print %dcsrm : tensor<4x3xf64, #DCSR>
 
@@ -155,10 +155,10 @@ module {
     // CHECK-NEXT: ----
     //
     %rowa = tensor.empty() : tensor<4x3xf64, #Row>
-    %row1 = sparse_tensor.insert %f1 into %rowa[%c0, %c0] : tensor<4x3xf64, #Row>
-    %row2 = sparse_tensor.insert %f2 into %row1[%c2, %c2] : tensor<4x3xf64, #Row>
-    %row3 = sparse_tensor.insert %f3 into %row2[%c3, %c0] : tensor<4x3xf64, #Row>
-    %row4 = sparse_tensor.insert %f4 into %row3[%c3, %c2] : tensor<4x3xf64, #Row>
+    %row1 = tensor.insert %f1 into %rowa[%c0, %c0] : tensor<4x3xf64, #Row>
+    %row2 = tensor.insert %f2 into %row1[%c2, %c2] : tensor<4x3xf64, #Row>
+    %row3 = tensor.insert %f3 into %row2[%c3, %c0] : tensor<4x3xf64, #Row>
+    %row4 = tensor.insert %f4 into %row3[%c3, %c2] : tensor<4x3xf64, #Row>
     %rowm = sparse_tensor.load %row4 hasInserts : tensor<4x3xf64, #Row>
     sparse_tensor.print %rowm : tensor<4x3xf64, #Row>
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_insert_3d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_insert_3d.mlir
index 1917fd987c5d7d..364a188cf37c4c 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_insert_3d.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_insert_3d.mlir
@@ -71,11 +71,11 @@ module {
     // CHECK-NEXT: values : ( 1.1, 2.2, 3.3, 4.4, 5.5
     // CHECK-NEXT: ----
     %tensora = tensor.empty() : tensor<5x4x3xf64, #TensorCSR>
-    %tensor1 = sparse_tensor.insert %f1 into %tensora[%c3, %c0, %c1] : tensor<5x4x3xf64, #TensorCSR>
-    %tensor2 = sparse_tensor.insert %f2 into %tensor1[%c3, %c0, %c2] : tensor<5x4x3xf64, #TensorCSR>
-    %tensor3 = sparse_tensor.insert %f3 into %tensor2[%c3, %c3, %c1] : tensor<5x4x3xf64, #TensorCSR>
-    %tensor4 = sparse_tensor.insert %f4 into %tensor3[%c4, %c2, %c2] : tensor<5x4x3xf64, #TensorCSR>
-    %tensor5 = sparse_tensor.insert %f5 into %tensor4[%c4, %c3, %c2] : tensor<5x4x3xf64, #TensorCSR>
+    %tensor1 = tensor.insert %f1 into %tensora[%c3, %c0, %c1] : tensor<5x4x3xf64, #TensorCSR>
+    %tensor2 = tensor.insert %f2 into %tensor1[%c3, %c0, %c2] : tensor<5x4x3xf64, #TensorCSR>
+    %tensor3 = tensor.insert %f3 into %tensor2[%c3, %c3, %c1] : tensor<5x4x3xf64, #TensorCSR>
+    %tensor4 = tensor.insert %f4 into %tensor3[%c4, %c2, %c2] : tensor<5x4x3xf64, #TensorCSR>
+    %tensor5 = tensor.insert %f5 into %tensor4[%c4, %c3, %c2] : tensor<5x4x3xf64, #TensorCSR>
     %tensorm = sparse_tensor.load %tensor5 hasInserts : tensor<5x4x3xf64, #TensorCSR>
     sparse_tensor.print %tensorm : tensor<5x4x3xf64, #TensorCSR>
 
@@ -90,11 +90,11 @@ module {
     // CHECK-NEXT: values : ( 0, 1.1, 2.2, 0, 3.3, 0, 0, 0, 4.4, 0, 0, 5.5
     // CHECK-NEXT: ----
     %rowa = tensor.empty() : tensor<5x4x3xf64, #TensorRow>
-    %row1 = sparse_tensor.insert %f1 into %rowa[%c3, %c0, %c1] : tensor<5x4x3xf64, #TensorRow>
-    %row2 = sparse_tensor.insert %f2 into %row1[%c3, %c0, %c2] : tensor<5x4x3xf64, #TensorRow>
-    %row3 = sparse_tensor.insert %f3 into %row2[%c3, %c3, %c1] : tensor<5x4x3xf64, #TensorRow>
-    %row4 = sparse_tensor.insert %f4 into %row3[%c4, %c2, %c2] : tensor<5x4x3xf64, #TensorRow>
-    %row5 = sparse_tensor.insert %f5 into %row4[%c4, %c3, %c2] : tensor<5x4x3xf64, #TensorRow>
+    %row1 = tensor.insert %f1 into %rowa[%c3, %c0, %c1] : tensor<5x4x3xf64, #TensorRow>
+    %row2 = tensor.insert %f2 into %row1[%c3, %c0, %c2] : tensor<5x4x3xf64, #TensorRow>
+    %row3 = tensor.insert %f3 into %row2[%c3, %c3, %c1] : tensor<5x4x3xf64, #TensorRow>
+    %row4 = tensor.insert %f4 into %row3[%c4, %c2, %c2] : tensor<5x4x3xf64, #TensorRow>
+    %row5 = tensor.insert %f5 into %row4[%c4, %c3, %c2] : tensor<5x4x3xf64, #TensorRow>
     %rowm = sparse_tensor.load %row5 hasInserts : tensor<5x4x3xf64, #TensorRow>
     sparse_tensor.print %rowm : tensor<5x4x3xf64, #TensorRow>
 
@@ -109,11 +109,11 @@ module {
     // CHECK-NEXT: values : ( 1.1, 2.2, 3.3, 4.4, 5.5
     // CHECK-NEXT: ----
     %ccoo = tensor.empty() : tensor<5x4x3xf64, #CCoo>
-    %ccoo1 = sparse_tensor.insert %f1 into %ccoo[%c3, %c0, %c1] : tensor<5x4x3xf64, #CCoo>
-    %ccoo2 = sparse_tensor.insert %f2 into %ccoo1[%c3, %c0, %c2] : tensor<5x4x3xf64, #CCoo>
-    %ccoo3 = sparse_tensor.insert %f3 into %ccoo2[%c3, %c3, %c1] : tensor<5x4x3xf64, #CCoo>
-    %ccoo4 = sparse_tensor.insert %f4 into %ccoo3[%c4, %c2, %c2] : tensor<5x4x3xf64, #CCoo>
-    %ccoo5 = sparse_tensor.insert %f5 into %ccoo4[%c4, %c3, %c2] : tensor<5x4x3xf64, #CCoo>
+    %ccoo1 = tensor.insert %f1 into %ccoo[%c3, %c0, %c1] : tensor<5x4x3xf64, #CCoo>
+    %ccoo2 = tensor.insert %f2 into %ccoo1[%c3, %c0, %c2] : tensor<5x4x3xf64, #CCoo>
+    %ccoo3 = tensor.insert %f3 into %ccoo2[%c3, %c3, %c1] : tensor<5x4x3xf64, #CCoo>
+    %ccoo4 = tensor.insert %f4 into %ccoo3[%c4, %c2, %c2] : tensor<5x4x3xf64, #CCoo>
+    %ccoo5 = tensor.insert %f5 into %ccoo4[%c4, %c3, %c2] : tensor<5x4x3xf64, #CCoo>
     %ccoom = sparse_tensor.load %ccoo5 hasInserts : tensor<5x4x3xf64, #CCoo>
     sparse_tensor.print %ccoom : tensor<5x4x3xf64, #CCoo>
 
@@ -126,11 +126,11 @@ module {
     // CHECK-NEXT: values : ( 1.1, 2.2, 3.3, 4.4, 5.5
     // CHECK-NEXT: ----
     %dcoo = tensor.empty() : tensor<5x4x3xf64, #DCoo>
-    %dcoo1 = sparse_tensor.insert %f1 into %dcoo[%c3, %c0, %c1] : tensor<5x4x3xf64, #DCoo>
-    %dcoo2 = sparse_tensor.insert %f2 into %dcoo1[%c3, %c0, %c2] : tensor<5x4x3xf64, #DCoo>
-    %dcoo3 = sparse_tensor.insert %f3 into %dcoo2[%c3, %c3, %c1] : tensor<5x4x3xf64, #DCoo>
-    %dcoo4 = sparse_tensor.insert %f4 into %dcoo3[%c4, %c2, %c2] : tensor<5x4x3xf64, #DCoo>
-    %dcoo5 = sparse_tensor.insert %f5 into %dcoo4[%c4, %c3, %c2] : tensor<5x4x3xf64, #DCoo>
+    %dcoo1 = tensor.insert %f1 into %dcoo[%c3, %c0, %c1] : tensor<5x4x3xf64, #DCoo>
+    %dcoo2 = tensor.insert %f2 into %dcoo1[%c3, %c0, %c2] : tensor<5x4x3xf64, #DCoo>
+    %dcoo3 = tensor.insert %f3 into %dcoo2[%c3, %c3, %c1] : tensor<5x4x3xf64, #DCoo>
+    %dcoo4 = tensor.insert %f4 into %dcoo3[%c4, %c2, %c2] : tensor<5x4x3xf64, #DCoo>
+    %dcoo5 = tensor.insert %f5 into %dcoo4[%c4, %c3, %c2] : tensor<5x4x3xf64, #DCoo>
     %dcoom = sparse_tensor.load %dcoo5 hasInserts : tensor<5x4x3xf64, #DCoo>
     sparse_tensor.print %dcoom : tensor<5x4x3xf64, #DCoo>
 

>From 64111831ed46b9e82b8626356c087ce2202f029b Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl at apple.com>
Date: Tue, 12 Mar 2024 17:00:51 -0700
Subject: [PATCH 02/10] Relax test to work with newer versions of lldb

---
 .../debuginfo-tests/llgdb-tests/forward-declare-class.cpp    | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/cross-project-tests/debuginfo-tests/llgdb-tests/forward-declare-class.cpp b/cross-project-tests/debuginfo-tests/llgdb-tests/forward-declare-class.cpp
index 132420009bd1df..850eed6ad95d98 100644
--- a/cross-project-tests/debuginfo-tests/llgdb-tests/forward-declare-class.cpp
+++ b/cross-project-tests/debuginfo-tests/llgdb-tests/forward-declare-class.cpp
@@ -6,9 +6,8 @@
 // Work around a gdb bug where it believes that a class is a
 // struct if there aren't any methods - even though it's tagged
 // as a class.
-// CHECK: type = {{struct|class}} A {
-// CHECK-NEXT: {{(public:){0,1}}}
-// CHECK-NEXT: int MyData;
+// CHECK: {{struct|class}} A {
+// CHECK:        int MyData;
 // CHECK-NEXT: }
 class A;
 class B {

>From bb5921e2a2da4a87016e623deb8c2eaed7f1f5a8 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek at google.com>
Date: Tue, 12 Mar 2024 17:01:29 -0700
Subject: [PATCH 03/10] [libc] Include FP_* macros in math.h (#84996)

These are used unconditionally by libc++ math.h.

This is related to issue #84879.
---
 libc/include/llvm-libc-macros/math-macros.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libc/include/llvm-libc-macros/math-macros.h b/libc/include/llvm-libc-macros/math-macros.h
index e67fe4d11b4493..db8a4ea65bd69a 100644
--- a/libc/include/llvm-libc-macros/math-macros.h
+++ b/libc/include/llvm-libc-macros/math-macros.h
@@ -11,6 +11,12 @@
 
 #include "limits-macros.h"
 
+#define FP_NAN 0
+#define FP_INFINITE 1
+#define FP_ZERO 2
+#define FP_SUBNORMAL 3
+#define FP_NORMAL 4
+
 #define MATH_ERRNO 1
 #define MATH_ERREXCEPT 2
 

>From accfbf4e4959957db0993a15bab7316001131df3 Mon Sep 17 00:00:00 2001
From: Zahi Moudallal <128723247+zahimoud at users.noreply.github.com>
Date: Tue, 12 Mar 2024 17:07:16 -0700
Subject: [PATCH 04/10] [MLIR][ROCDL] Add BallotOp and lit test (#84856)

---
 mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 10 ++++++++++
 mlir/test/Target/LLVMIR/rocdl.mlir           |  7 +++++++
 2 files changed, 17 insertions(+)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 32b5a1c016b6f8..abb38a3df8068c 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -158,6 +158,16 @@ Arguments<(ins I32:$index,
    }];
 }
 
+def ROCDL_BallotOp :
+  ROCDL_Op<"ballot">,
+  Results<(outs LLVM_Type:$res)>,
+  Arguments<(ins I1:$pred)> {
+  string llvmBuilder = [{
+      $res = createIntrinsicCall(builder,
+            llvm::Intrinsic::amdgcn_ballot, {$pred}, {llvm::Type::getInt32Ty(moduleTranslation.getLLVMContext())});
+  }];
+  let assemblyFormat = "$pred attr-dict `:` type($res)";
+}
 
 //===----------------------------------------------------------------------===//
 // Thread index and Block index
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index d35acb0475e6f8..93550f5c7bd5a4 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -88,6 +88,13 @@ llvm.func @rocdl.bpermute(%src : i32) -> i32 {
   llvm.return %0 : i32
 }
 
+llvm.func @rocdl.ballot(%pred : i1) -> i32 {
+  // CHECK-LABEL: rocdl.ballot
+  // CHECK: call i32 @llvm.amdgcn.ballot
+  %0 = rocdl.ballot %pred : i32
+  llvm.return %0 : i32
+}
+
 llvm.func @rocdl.waitcnt() {
   // CHECK-LABEL: rocdl.waitcnt
   // CHECK-NEXT: call void @llvm.amdgcn.s.waitcnt(i32 0)

>From e2468bf16a0c1f63a39aa417c15c03ebd77fab9e Mon Sep 17 00:00:00 2001
From: Jason Molenda <jmolenda at apple.com>
Date: Tue, 12 Mar 2024 17:19:46 -0700
Subject: [PATCH 05/10] [lldb][debugserver] Update flags past to app launch
 request

rdar://117421999
---
 lldb/tools/debugserver/source/MacOSX/MachProcess.mm | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lldb/tools/debugserver/source/MacOSX/MachProcess.mm b/lldb/tools/debugserver/source/MacOSX/MachProcess.mm
index 87bdbf835bfd10..70b4564a027b1b 100644
--- a/lldb/tools/debugserver/source/MacOSX/MachProcess.mm
+++ b/lldb/tools/debugserver/source/MacOSX/MachProcess.mm
@@ -472,6 +472,8 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options,
   // And there are some other options at the top level in this dictionary:
   [options setObject:[NSNumber numberWithBool:YES]
               forKey:FBSOpenApplicationOptionKeyUnlockDevice];
+  [options setObject:[NSNumber numberWithBool:YES]
+              forKey:FBSOpenApplicationOptionKeyPromptUnlockDevice];
 
   // We have to get the "sequence ID & UUID" for this app bundle path and send
   // them to FBS:

>From 2f400a2fd77b44d34281792aca84c42e149095e7 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 12 Mar 2024 20:22:49 -0400
Subject: [PATCH 06/10] [GISEL] Add G_VSCALE instruction (#84542)

---
 llvm/docs/GlobalISel/GenericOpcode.rst        | 11 ++++++++++
 .../CodeGen/GlobalISel/MachineIRBuilder.h     | 22 +++++++++++++++++++
 llvm/include/llvm/Support/TargetOpcodes.def   |  3 +++
 llvm/include/llvm/Target/GenericOpcodes.td    |  9 ++++++++
 .../CodeGen/GlobalISel/MachineIRBuilder.cpp   | 18 +++++++++++++++
 llvm/lib/CodeGen/MachineVerifier.cpp          | 11 ++++++++++
 .../GlobalISel/legalizer-info-validation.mir  |  3 +++
 llvm/test/MachineVerifier/test_g_vscale.mir   | 15 +++++++++++++
 8 files changed, 92 insertions(+)
 create mode 100644 llvm/test/MachineVerifier/test_g_vscale.mir

diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst
index f9f9e1186460ee..bf1b3cb30a52e5 100644
--- a/llvm/docs/GlobalISel/GenericOpcode.rst
+++ b/llvm/docs/GlobalISel/GenericOpcode.rst
@@ -607,6 +607,17 @@ See the LLVM LangRef entry on '``llvm.lround.*'`` for details on behaviour.
 Vector Specific Operations
 --------------------------
 
+G_VSCALE
+^^^^^^^^
+
+Puts the value of the runtime ``vscale`` multiplied by the value in the source
+operand into the destination register. This can be useful in determining the
+actual runtime number of elements in a vector.
+
+.. code-block::
+
+  %0:_(s32) = G_VSCALE 4
+
 G_INSERT_SUBVECTOR
 ^^^^^^^^^^^^^^^^^^
 
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 4732eaf4ee27c5..aaa81342845bff 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1143,6 +1143,28 @@ class MachineIRBuilder {
   MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src,
                                   const SrcOp &Op, unsigned Index);
 
+  /// Build and insert \p Res = G_VSCALE \p MinElts
+  ///
+  /// G_VSCALE puts the value of the runtime vscale multiplied by \p MinElts
+  /// into \p Res.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res must be a generic virtual register with scalar type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts);
+
+  /// Build and insert \p Res = G_VSCALE \p MinElts
+  ///
+  /// G_VSCALE puts the value of the runtime vscale multiplied by \p MinElts
+  /// into \p Res.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res must be a generic virtual register with scalar type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildVScale(const DstOp &Res, const ConstantInt &MinElts);
+
   /// Build and insert a G_INTRINSIC instruction.
   ///
   /// There are four different opcodes based on combinations of whether the
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 3dade14f043b60..899eaad5842ae0 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -727,6 +727,9 @@ HANDLE_TARGET_OPCODE(G_BR)
 /// Generic branch to jump table entry.
 HANDLE_TARGET_OPCODE(G_BRJT)
 
+/// Generic vscale.
+HANDLE_TARGET_OPCODE(G_VSCALE)
+
 /// Generic insert subvector.
 HANDLE_TARGET_OPCODE(G_INSERT_SUBVECTOR)
 
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 8dc84fb0ba0524..67d405ba96fa10 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -1289,6 +1289,15 @@ def G_MERGE_VALUES : GenericInstruction {
   let variadicOpsType = type1;
 }
 
+// Generic vscale.
+// Puts the value of the runtime vscale multiplied by the value in the source
+// operand into the destination register.
+def G_VSCALE : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins unknown:$src);
+  let hasSideEffects = false;
+}
+
 /// Create a vector from multiple scalar registers. No implicit
 /// conversion is performed (i.e. the result element type must be the
 /// same as all source operands)
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 9b12d443c96e98..f7aaa0f02efcb3 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -793,6 +793,24 @@ MachineInstrBuilder MachineIRBuilder::buildInsert(const DstOp &Res,
   return buildInstr(TargetOpcode::G_INSERT, Res, {Src, Op, uint64_t(Index)});
 }
 
+MachineInstrBuilder MachineIRBuilder::buildVScale(const DstOp &Res,
+                                                  unsigned MinElts) {
+
+  auto IntN = IntegerType::get(getMF().getFunction().getContext(),
+                               Res.getLLTTy(*getMRI()).getScalarSizeInBits());
+  ConstantInt *CI = ConstantInt::get(IntN, MinElts);
+  return buildVScale(Res, *CI);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildVScale(const DstOp &Res,
+                                                  const ConstantInt &MinElts) {
+  auto VScale = buildInstr(TargetOpcode::G_VSCALE);
+  VScale->setDebugLoc(DebugLoc());
+  Res.addDefToMIB(*getMRI(), VScale);
+  VScale.addCImm(&MinElts);
+  return VScale;
+}
+
 static unsigned getIntrinsicOpcode(bool HasSideEffects, bool IsConvergent) {
   if (HasSideEffects && IsConvergent)
     return TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS;
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 90cbf097370de2..c2d6dd35e1cb21 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1613,6 +1613,17 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
       report("G_BSWAP size must be a multiple of 16 bits", MI);
     break;
   }
+  case TargetOpcode::G_VSCALE: {
+    if (!MI->getOperand(1).isCImm()) {
+      report("G_VSCALE operand must be cimm", MI);
+      break;
+    }
+    if (MI->getOperand(1).getCImm()->isZero()) {
+      report("G_VSCALE immediate cannot be zero", MI);
+      break;
+    }
+    break;
+  }
   case TargetOpcode::G_INSERT_SUBVECTOR: {
     const MachineOperand &Src0Op = MI->getOperand(1);
     if (!Src0Op.isReg()) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index ac330918b430a0..c9e5f8924f8a0e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -616,6 +616,9 @@
 # DEBUG-NEXT: G_BRJT (opcode {{[0-9]+}}): 2 type indices
 # DEBUG-NEXT: .. the first uncovered type index: 2, OK
 # DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: G_VSCALE (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
 # DEBUG-NEXT: G_INSERT_SUBVECTOR (opcode {{[0-9]+}}): 2 type indices, 1 imm index
 # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
diff --git a/llvm/test/MachineVerifier/test_g_vscale.mir b/llvm/test/MachineVerifier/test_g_vscale.mir
new file mode 100644
index 00000000000000..78854620913a16
--- /dev/null
+++ b/llvm/test/MachineVerifier/test_g_vscale.mir
@@ -0,0 +1,15 @@
+# RUN: not --crash llc -verify-machineinstrs -run-pass none -o /dev/null %s 2>&1 | FileCheck %s
+
+---
+name:            g_vscale
+body: |
+  bb.0:
+
+  %1:_(s32) = G_CONSTANT i32 4
+
+  ; CHECK: G_VSCALE operand must be cimm
+  %2:_(s32) = G_VSCALE %1
+
+  ; CHECK: G_VSCALE immediate cannot be zero
+  %3:_(s32) = G_VSCALE i32 0
+...

>From f467cc9caf37fcf6b3523271f977585c39372d55 Mon Sep 17 00:00:00 2001
From: Dave Clausen <daveclausen at gmail.com>
Date: Tue, 12 Mar 2024 17:21:00 -0700
Subject: [PATCH 07/10] [tsan] Add missing link option to tsan test after
 #84923

Pull Request: https://github.com/llvm/llvm-project/pull/85003
---
 compiler-rt/test/tsan/compare_exchange_acquire_fence.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/test/tsan/compare_exchange_acquire_fence.cpp b/compiler-rt/test/tsan/compare_exchange_acquire_fence.cpp
index b9fd0c5ad21f24..404fd2fbb3c226 100644
--- a/compiler-rt/test/tsan/compare_exchange_acquire_fence.cpp
+++ b/compiler-rt/test/tsan/compare_exchange_acquire_fence.cpp
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1
+// RUN: %clangxx_tsan -O1 %s %link_libcxx_tsan -o %t && %run %t 2>&1
 // This is a correct program and tsan should not report a race.
 //
 // Verify that there is a happens-before relationship between a

>From 2aacb56e8361213c1bd69c2ceafdea3aa0ca9125 Mon Sep 17 00:00:00 2001
From: 4ast <alexei.starovoitov at gmail.com>
Date: Tue, 12 Mar 2024 17:27:25 -0700
Subject: [PATCH 08/10] BPF address space insn (#84410)

This commit aims to support BPF arena kernel side
[feature](https://lore.kernel.org/bpf/20240209040608.98927-1-alexei.starovoitov@gmail.com/):
- arena is a memory region accessible from both BPF program and
userspace;
- base pointers for this memory region differ between kernel and user
spaces;
- `dst_reg = addr_space_cast(src_reg, dst_addr_space, src_addr_space)`
translates src_reg, a pointer in src_addr_space to dst_reg, equivalent
pointer in dst_addr_space, {src,dst}_addr_space are immediate constants;
- number 0 is assigned to kernel address space;
- number 1 is assigned to user address space.

On the LLVM side, the goal is to make load and store operations on arena
pointers "transparent" for BPF programs:
- assume that pointers with non-zero address space are pointers to
  arena memory;
- assume that arena is identified by address space number;
- assume that address space zero corresponds to kernel address space;
- assume that every BPF-side load or store from arena is done via
pointer in user address space, thus convert base pointers using
`addr_space_cast(src_reg, 0, 1)`;

Only load, store, cmpxchg and atomicrmw IR instructions are handled by
this transformation.

For example, the following C code:

```c
   #define __as __attribute__((address_space(1)))
   void copy(int __as *from, int __as *to) { *to = *from; }
```

Compiled to the following IR:

```llvm
    define void @copy(ptr addrspace(1) %from, ptr addrspace(1) %to) {
    entry:
      %0 = load i32, ptr addrspace(1) %from, align 4
      store i32 %0, ptr addrspace(1) %to, align 4
      ret void
    }
```

Is transformed to:

```llvm
    %to2 = addrspacecast ptr addrspace(1) %to to ptr     ;; !
    %from1 = addrspacecast ptr addrspace(1) %from to ptr ;; !
    %0 = load i32, ptr %from1, align 4, !tbaa !3
    store i32 %0, ptr %to2, align 4, !tbaa !3
    ret void
```

And compiled as:

```asm
    r2 = addr_space_cast(r2, 0, 1)
    r1 = addr_space_cast(r1, 0, 1)
    r1 = *(u32 *)(r1 + 0)
    *(u32 *)(r2 + 0) = r1
    exit
```

Co-authored-by: Eduard Zingerman <eddyz87 at gmail.com>
---
 clang/lib/Basic/Targets/BPF.cpp               |   3 +
 .../test/Preprocessor/bpf-predefined-macros.c |   8 ++
 .../lib/Target/BPF/AsmParser/BPFAsmParser.cpp |   1 +
 llvm/lib/Target/BPF/BPF.h                     |   8 ++
 .../Target/BPF/BPFASpaceCastSimplifyPass.cpp  |  92 ++++++++++++++
 llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp   | 116 ++++++++++++++++++
 llvm/lib/Target/BPF/BPFInstrInfo.td           |  29 +++++
 llvm/lib/Target/BPF/BPFTargetMachine.cpp      |   5 +
 llvm/lib/Target/BPF/CMakeLists.txt            |   1 +
 .../test/CodeGen/BPF/addr-space-auto-casts.ll |  78 ++++++++++++
 llvm/test/CodeGen/BPF/addr-space-cast.ll      |  22 ++++
 llvm/test/CodeGen/BPF/addr-space-gep-chain.ll |  25 ++++
 llvm/test/CodeGen/BPF/addr-space-globals.ll   |  30 +++++
 llvm/test/CodeGen/BPF/addr-space-globals2.ll  |  25 ++++
 llvm/test/CodeGen/BPF/addr-space-phi.ll       |  53 ++++++++
 .../test/CodeGen/BPF/addr-space-simplify-1.ll |  19 +++
 .../test/CodeGen/BPF/addr-space-simplify-2.ll |  21 ++++
 .../test/CodeGen/BPF/addr-space-simplify-3.ll |  26 ++++
 .../test/CodeGen/BPF/addr-space-simplify-4.ll |  21 ++++
 .../test/CodeGen/BPF/addr-space-simplify-5.ll |  25 ++++
 .../test/CodeGen/BPF/assembler-disassembler.s |   7 ++
 21 files changed, 615 insertions(+)
 create mode 100644 llvm/lib/Target/BPF/BPFASpaceCastSimplifyPass.cpp
 create mode 100644 llvm/test/CodeGen/BPF/addr-space-auto-casts.ll
 create mode 100644 llvm/test/CodeGen/BPF/addr-space-cast.ll
 create mode 100644 llvm/test/CodeGen/BPF/addr-space-gep-chain.ll
 create mode 100644 llvm/test/CodeGen/BPF/addr-space-globals.ll
 create mode 100644 llvm/test/CodeGen/BPF/addr-space-globals2.ll
 create mode 100644 llvm/test/CodeGen/BPF/addr-space-phi.ll
 create mode 100644 llvm/test/CodeGen/BPF/addr-space-simplify-1.ll
 create mode 100644 llvm/test/CodeGen/BPF/addr-space-simplify-2.ll
 create mode 100644 llvm/test/CodeGen/BPF/addr-space-simplify-3.ll
 create mode 100644 llvm/test/CodeGen/BPF/addr-space-simplify-4.ll
 create mode 100644 llvm/test/CodeGen/BPF/addr-space-simplify-5.ll

diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp
index e3fbbb720d0694..26a54f631fcfc4 100644
--- a/clang/lib/Basic/Targets/BPF.cpp
+++ b/clang/lib/Basic/Targets/BPF.cpp
@@ -35,6 +35,9 @@ void BPFTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__BPF_CPU_VERSION__", "0");
     return;
   }
+
+  Builder.defineMacro("__BPF_FEATURE_ARENA_CAST");
+
   if (CPU.empty() || CPU == "generic" || CPU == "v1") {
     Builder.defineMacro("__BPF_CPU_VERSION__", "1");
     return;
diff --git a/clang/test/Preprocessor/bpf-predefined-macros.c b/clang/test/Preprocessor/bpf-predefined-macros.c
index ff4d00ac3bcfcc..fea24d1ea0ff7b 100644
--- a/clang/test/Preprocessor/bpf-predefined-macros.c
+++ b/clang/test/Preprocessor/bpf-predefined-macros.c
@@ -61,6 +61,9 @@ int r;
 #ifdef __BPF_FEATURE_ST
 int s;
 #endif
+#ifdef __BPF_FEATURE_ARENA_CAST
+int t;
+#endif
 
 // CHECK: int b;
 // CHECK: int c;
@@ -90,6 +93,11 @@ int s;
 // CPU_V4: int r;
 // CPU_V4: int s;
 
+// CPU_V1: int t;
+// CPU_V2: int t;
+// CPU_V3: int t;
+// CPU_V4: int t;
+
 // CPU_GENERIC: int g;
 
 // CPU_PROBE: int f;
diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 0d1eef60c3b550..3145bc3d19f5dc 100644
--- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -271,6 +271,7 @@ struct BPFOperand : public MCParsedAsmOperand {
         .Case("xchg32_32", true)
         .Case("cmpxchg_64", true)
         .Case("cmpxchg32_32", true)
+        .Case("addr_space_cast", true)
         .Default(false);
   }
 };
diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h
index 5c77d183e1ef3d..bbdbdbbde53228 100644
--- a/llvm/lib/Target/BPF/BPF.h
+++ b/llvm/lib/Target/BPF/BPF.h
@@ -66,6 +66,14 @@ class BPFIRPeepholePass : public PassInfoMixin<BPFIRPeepholePass> {
   static bool isRequired() { return true; }
 };
 
+class BPFASpaceCastSimplifyPass
+    : public PassInfoMixin<BPFASpaceCastSimplifyPass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+  static bool isRequired() { return true; }
+};
+
 class BPFAdjustOptPass : public PassInfoMixin<BPFAdjustOptPass> {
 public:
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
diff --git a/llvm/lib/Target/BPF/BPFASpaceCastSimplifyPass.cpp b/llvm/lib/Target/BPF/BPFASpaceCastSimplifyPass.cpp
new file mode 100644
index 00000000000000..f87b299bbba658
--- /dev/null
+++ b/llvm/lib/Target/BPF/BPFASpaceCastSimplifyPass.cpp
@@ -0,0 +1,92 @@
+//===-- BPFASpaceCastSimplifyPass.cpp - BPF addrspacecast simplications --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include <optional>
+
+#define DEBUG_TYPE "bpf-aspace-simplify"
+
+using namespace llvm;
+
+namespace {
+
+struct CastGEPCast {
+  AddrSpaceCastInst *OuterCast;
+
+  // Match chain of instructions:
+  //   %inner = addrspacecast N->M
+  //   %gep   = getelementptr %inner, ...
+  //   %outer = addrspacecast M->N %gep
+  // Where I is %outer.
+  static std::optional<CastGEPCast> match(Value *I) {
+    auto *OuterCast = dyn_cast<AddrSpaceCastInst>(I);
+    if (!OuterCast)
+      return std::nullopt;
+    auto *GEP = dyn_cast<GetElementPtrInst>(OuterCast->getPointerOperand());
+    if (!GEP)
+      return std::nullopt;
+    auto *InnerCast = dyn_cast<AddrSpaceCastInst>(GEP->getPointerOperand());
+    if (!InnerCast)
+      return std::nullopt;
+    if (InnerCast->getSrcAddressSpace() != OuterCast->getDestAddressSpace())
+      return std::nullopt;
+    if (InnerCast->getDestAddressSpace() != OuterCast->getSrcAddressSpace())
+      return std::nullopt;
+    return CastGEPCast{OuterCast};
+  }
+
+  static PointerType *changeAddressSpace(PointerType *Ty, unsigned AS) {
+    return Ty->get(Ty->getContext(), AS);
+  }
+
+  // Assuming match(this->OuterCast) is true, convert:
+  //   (addrspacecast M->N (getelementptr (addrspacecast N->M ptr) ...))
+  // To:
+  //   (getelementptr ptr ...)
+  GetElementPtrInst *rewrite() {
+    auto *GEP = cast<GetElementPtrInst>(OuterCast->getPointerOperand());
+    auto *InnerCast = cast<AddrSpaceCastInst>(GEP->getPointerOperand());
+    unsigned AS = OuterCast->getDestAddressSpace();
+    auto *NewGEP = cast<GetElementPtrInst>(GEP->clone());
+    NewGEP->setName(GEP->getName());
+    NewGEP->insertAfter(OuterCast);
+    NewGEP->setOperand(0, InnerCast->getPointerOperand());
+    auto *GEPTy = cast<PointerType>(GEP->getType());
+    NewGEP->mutateType(changeAddressSpace(GEPTy, AS));
+    OuterCast->replaceAllUsesWith(NewGEP);
+    OuterCast->eraseFromParent();
+    if (GEP->use_empty())
+      GEP->eraseFromParent();
+    if (InnerCast->use_empty())
+      InnerCast->eraseFromParent();
+    return NewGEP;
+  }
+};
+
+} // anonymous namespace
+
+PreservedAnalyses BPFASpaceCastSimplifyPass::run(Function &F,
+                                                 FunctionAnalysisManager &AM) {
+  SmallVector<CastGEPCast, 16> WorkList;
+  bool Changed = false;
+  for (BasicBlock &BB : F) {
+    for (Instruction &I : BB)
+      if (auto It = CastGEPCast::match(&I))
+        WorkList.push_back(It.value());
+    Changed |= !WorkList.empty();
+
+    while (!WorkList.empty()) {
+      CastGEPCast InsnChain = WorkList.pop_back_val();
+      GetElementPtrInst *NewGEP = InsnChain.rewrite();
+      for (User *U : NewGEP->users())
+        if (auto It = CastGEPCast::match(U))
+          WorkList.push_back(It.value());
+    }
+  }
+  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
index 81effc9b1db46c..edd59aaa6d01d2 100644
--- a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
+++ b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
@@ -14,6 +14,8 @@
 //     optimizations are done and those builtins can be removed.
 //   - remove llvm.bpf.getelementptr.and.load builtins.
 //   - remove llvm.bpf.getelementptr.and.store builtins.
+//   - for loads and stores with base addresses from non-zero address space
+//     cast base address to zero address space (support for BPF arenas).
 //
 //===----------------------------------------------------------------------===//
 
@@ -55,6 +57,7 @@ class BPFCheckAndAdjustIR final : public ModulePass {
   bool removeCompareBuiltin(Module &M);
   bool sinkMinMax(Module &M);
   bool removeGEPBuiltins(Module &M);
+  bool insertASpaceCasts(Module &M);
 };
 } // End anonymous namespace
 
@@ -416,11 +419,124 @@ bool BPFCheckAndAdjustIR::removeGEPBuiltins(Module &M) {
   return Changed;
 }
 
+// Wrap ToWrap with cast to address space zero:
+// - if ToWrap is a getelementptr,
+//   wrap it's base pointer instead and return a copy;
+// - if ToWrap is Instruction, insert address space cast
+//   immediately after ToWrap;
+// - if ToWrap is not an Instruction (function parameter
+//   or a global value), insert address space cast at the
+//   beginning of the Function F;
+// - use Cache to avoid inserting too many casts;
+static Value *aspaceWrapValue(DenseMap<Value *, Value *> &Cache, Function *F,
+                              Value *ToWrap) {
+  auto It = Cache.find(ToWrap);
+  if (It != Cache.end())
+    return It->getSecond();
+
+  if (auto *GEP = dyn_cast<GetElementPtrInst>(ToWrap)) {
+    Value *Ptr = GEP->getPointerOperand();
+    Value *WrappedPtr = aspaceWrapValue(Cache, F, Ptr);
+    auto *GEPTy = cast<PointerType>(GEP->getType());
+    auto *NewGEP = GEP->clone();
+    NewGEP->insertAfter(GEP);
+    NewGEP->mutateType(GEPTy->getPointerTo(0));
+    NewGEP->setOperand(GEP->getPointerOperandIndex(), WrappedPtr);
+    NewGEP->setName(GEP->getName());
+    Cache[ToWrap] = NewGEP;
+    return NewGEP;
+  }
+
+  IRBuilder IB(F->getContext());
+  if (Instruction *InsnPtr = dyn_cast<Instruction>(ToWrap))
+    IB.SetInsertPoint(*InsnPtr->getInsertionPointAfterDef());
+  else
+    IB.SetInsertPoint(F->getEntryBlock().getFirstInsertionPt());
+  auto *PtrTy = cast<PointerType>(ToWrap->getType());
+  auto *ASZeroPtrTy = PtrTy->getPointerTo(0);
+  auto *ACast = IB.CreateAddrSpaceCast(ToWrap, ASZeroPtrTy, ToWrap->getName());
+  Cache[ToWrap] = ACast;
+  return ACast;
+}
+
+// Wrap a pointer operand OpNum of instruction I
+// with cast to address space zero
+static void aspaceWrapOperand(DenseMap<Value *, Value *> &Cache, Instruction *I,
+                              unsigned OpNum) {
+  Value *OldOp = I->getOperand(OpNum);
+  if (OldOp->getType()->getPointerAddressSpace() == 0)
+    return;
+
+  Value *NewOp = aspaceWrapValue(Cache, I->getFunction(), OldOp);
+  I->setOperand(OpNum, NewOp);
+  // Check if there are any remaining users of old GEP,
+  // delete those w/o users
+  for (;;) {
+    auto *OldGEP = dyn_cast<GetElementPtrInst>(OldOp);
+    if (!OldGEP)
+      break;
+    if (!OldGEP->use_empty())
+      break;
+    OldOp = OldGEP->getPointerOperand();
+    OldGEP->eraseFromParent();
+  }
+}
+
+// Support for BPF arenas:
+// - for each function in the module M, update pointer operand of
+//   each memory access instruction (load/store/cmpxchg/atomicrmw)
+//   by casting it from non-zero address space to zero address space, e.g:
+//
+//   (load (ptr addrspace (N) %p) ...)
+//     -> (load (addrspacecast ptr addrspace (N) %p to ptr))
+//
+// - assign section with name .arena.N for globals defined in
+//   non-zero address space N
+bool BPFCheckAndAdjustIR::insertASpaceCasts(Module &M) {
+  bool Changed = false;
+  for (Function &F : M) {
+    DenseMap<Value *, Value *> CastsCache;
+    for (BasicBlock &BB : F) {
+      for (Instruction &I : BB) {
+        unsigned PtrOpNum;
+
+        if (auto *LD = dyn_cast<LoadInst>(&I))
+          PtrOpNum = LD->getPointerOperandIndex();
+        else if (auto *ST = dyn_cast<StoreInst>(&I))
+          PtrOpNum = ST->getPointerOperandIndex();
+        else if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(&I))
+          PtrOpNum = CmpXchg->getPointerOperandIndex();
+        else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
+          PtrOpNum = RMW->getPointerOperandIndex();
+        else
+          continue;
+
+        aspaceWrapOperand(CastsCache, &I, PtrOpNum);
+      }
+    }
+    Changed |= !CastsCache.empty();
+  }
+  // Merge all globals within same address space into single
+  // .arena.<addr space no> section
+  for (GlobalVariable &G : M.globals()) {
+    if (G.getAddressSpace() == 0 || G.hasSection())
+      continue;
+    SmallString<16> SecName;
+    raw_svector_ostream OS(SecName);
+    OS << ".arena." << G.getAddressSpace();
+    G.setSection(SecName);
+    // Prevent having separate section for constants
+    G.setConstant(false);
+  }
+  return Changed;
+}
+
 bool BPFCheckAndAdjustIR::adjustIR(Module &M) {
   bool Changed = removePassThroughBuiltin(M);
   Changed = removeCompareBuiltin(M) || Changed;
   Changed = sinkMinMax(M) || Changed;
   Changed = removeGEPBuiltins(M) || Changed;
+  Changed = insertASpaceCasts(M) || Changed;
   return Changed;
 }
 
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index 82d34702310668..7198e9499bc32a 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -420,6 +420,35 @@ let Predicates = [BPFHasMovsx] in {
 }
 }
 
+def ADDR_SPACE_CAST
+    : ALU_RR<BPF_ALU64, BPF_MOV, 64,
+             (outs GPR:$dst),
+             (ins GPR:$src, i64imm:$dst_as, i64imm:$src_as),
+             "$dst = addr_space_cast($src, $dst_as, $src_as)",
+             []> {
+  bits<64> dst_as;
+  bits<64> src_as;
+
+  let Inst{47-32} = 1;
+  let Inst{31-16} = dst_as{15-0};
+  let Inst{15-0} = src_as{15-0};
+}
+
+def SrcAddrSpace : SDNodeXForm<addrspacecast, [{
+  return CurDAG->getTargetConstant(
+    cast<AddrSpaceCastSDNode>(N)->getSrcAddressSpace(),
+    SDLoc(N), MVT::i64);
+}]>;
+
+def DstAddrSpace : SDNodeXForm<addrspacecast, [{
+  return CurDAG->getTargetConstant(
+    cast<AddrSpaceCastSDNode>(N)->getDestAddressSpace(),
+    SDLoc(N), MVT::i64);
+}]>;
+
+def : Pat<(addrspacecast:$this GPR:$src),
+          (ADDR_SPACE_CAST $src, (DstAddrSpace $this), (SrcAddrSpace $this))>;
+
 def FI_ri
     : TYPE_LD_ST<BPF_IMM.Value, BPF_DW.Value,
                  (outs GPR:$dst),
diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 08ac4b25540f70..5f26bec2e390c8 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -121,6 +121,10 @@ void BPFTargetMachine::registerPassBuilderCallbacks(
           FPM.addPass(BPFPreserveStaticOffsetPass(false));
           return true;
         }
+        if (PassName == "bpf-aspace-simplify") {
+          FPM.addPass(BPFASpaceCastSimplifyPass());
+          return true;
+        }
         return false;
       });
   PB.registerPipelineStartEPCallback(
@@ -135,6 +139,7 @@ void BPFTargetMachine::registerPassBuilderCallbacks(
   PB.registerPeepholeEPCallback([=](FunctionPassManager &FPM,
                                     OptimizationLevel Level) {
     FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)));
+    FPM.addPass(BPFASpaceCastSimplifyPass());
   });
   PB.registerScalarOptimizerLateEPCallback(
       [=](FunctionPassManager &FPM, OptimizationLevel Level) {
diff --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt
index d88e7ade40b9a0..cb21ed03a86c1e 100644
--- a/llvm/lib/Target/BPF/CMakeLists.txt
+++ b/llvm/lib/Target/BPF/CMakeLists.txt
@@ -24,6 +24,7 @@ add_llvm_target(BPFCodeGen
   BPFAbstractMemberAccess.cpp
   BPFAdjustOpt.cpp
   BPFAsmPrinter.cpp
+  BPFASpaceCastSimplifyPass.cpp
   BPFCheckAndAdjustIR.cpp
   BPFFrameLowering.cpp
   BPFInstrInfo.cpp
diff --git a/llvm/test/CodeGen/BPF/addr-space-auto-casts.ll b/llvm/test/CodeGen/BPF/addr-space-auto-casts.ll
new file mode 100644
index 00000000000000..08e11e861c71cb
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-auto-casts.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s
+
+define void @simple_store(ptr addrspace(272) %foo) {
+; CHECK-LABEL: define void @simple_store(
+; CHECK-SAME: ptr addrspace(272) [[FOO:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[FOO1:%.*]] = addrspacecast ptr addrspace(272) [[FOO]] to ptr
+; CHECK-NEXT:    [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[FOO1]], i64 16
+; CHECK-NEXT:    store volatile i32 57005, ptr [[ADD_PTR2]], align 4
+; CHECK-NEXT:    [[ADD_PTR13:%.*]] = getelementptr inbounds i8, ptr [[FOO1]], i64 12
+; CHECK-NEXT:    store volatile i32 48879, ptr [[ADD_PTR13]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add.ptr = getelementptr inbounds i8, ptr addrspace(272) %foo, i64 16
+  store volatile i32 57005, ptr addrspace(272) %add.ptr, align 4
+  %add.ptr1 = getelementptr inbounds i8, ptr addrspace(272) %foo, i64 12
+  store volatile i32 48879, ptr addrspace(272) %add.ptr1, align 4
+  ret void
+}
+
+define void @separate_addr_store(ptr addrspace(272) %foo, ptr addrspace(272) %bar) {
+; CHECK-LABEL: define void @separate_addr_store(
+; CHECK-SAME: ptr addrspace(272) [[FOO:%.*]], ptr addrspace(272) [[BAR:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BAR3:%.*]] = addrspacecast ptr addrspace(272) [[BAR]] to ptr
+; CHECK-NEXT:    [[FOO1:%.*]] = addrspacecast ptr addrspace(272) [[FOO]] to ptr
+; CHECK-NEXT:    [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[FOO1]], i64 16
+; CHECK-NEXT:    store volatile i32 57005, ptr [[ADD_PTR2]], align 4
+; CHECK-NEXT:    [[ADD_PTR14:%.*]] = getelementptr inbounds i8, ptr [[BAR3]], i64 12
+; CHECK-NEXT:    store volatile i32 48879, ptr [[ADD_PTR14]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add.ptr = getelementptr inbounds i8, ptr addrspace(272) %foo, i64 16
+  store volatile i32 57005, ptr addrspace(272) %add.ptr, align 4
+  %add.ptr1 = getelementptr inbounds i8, ptr addrspace(272) %bar, i64 12
+  store volatile i32 48879, ptr addrspace(272) %add.ptr1, align 4
+  ret void
+}
+
+define i32 @simple_load(ptr addrspace(272) %foo) {
+; CHECK-LABEL: define i32 @simple_load(
+; CHECK-SAME: ptr addrspace(272) [[FOO:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[FOO1:%.*]] = addrspacecast ptr addrspace(272) [[FOO]] to ptr
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[FOO1]], align 4
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+entry:
+  %0 = load i32, ptr addrspace(272) %foo, align 4
+  ret i32 %0
+}
+
+define { i32, i1 } @simple_cmpxchg(ptr addrspace(1) %i) {
+; CHECK-LABEL: define { i32, i1 } @simple_cmpxchg(
+; CHECK-SAME: ptr addrspace(1) [[I:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I1:%.*]] = addrspacecast ptr addrspace(1) [[I]] to ptr
+; CHECK-NEXT:    [[A:%.*]] = cmpxchg ptr [[I1]], i32 7, i32 42 monotonic monotonic, align 4
+; CHECK-NEXT:    ret { i32, i1 } [[A]]
+;
+entry:
+  %a = cmpxchg ptr addrspace(1) %i, i32 7, i32 42 monotonic monotonic, align 4
+  ret { i32, i1 } %a
+}
+
+define void @simple_atomicrmw(ptr addrspace(1) %p) {
+; CHECK-LABEL: define void @simple_atomicrmw(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) {
+; CHECK-NEXT:    [[P1:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
+; CHECK-NEXT:    [[A:%.*]] = atomicrmw add ptr [[P1]], i64 42 monotonic, align 8
+; CHECK-NEXT:    ret void
+;
+  %a = atomicrmw add ptr addrspace(1) %p, i64 42 monotonic, align 8
+  ret void
+}
diff --git a/llvm/test/CodeGen/BPF/addr-space-cast.ll b/llvm/test/CodeGen/BPF/addr-space-cast.ll
new file mode 100644
index 00000000000000..ad2860d8038ea7
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-cast.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -march=bpfel -mcpu=v4 -filetype=asm -show-mc-encoding < %s | FileCheck %s
+
+define ptr addrspace(1) @foo(ptr %p) {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    r0 = addr_space_cast(r1, 1, 0) # encoding: [0xbf,0x10,0x01,0x00,0x00,0x00,0x01,0x00]
+; CHECK-NEXT:    exit                           # encoding: [0x95,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+entry:
+  %0 = addrspacecast ptr %p to ptr addrspace(1)
+  ret ptr addrspace(1) %0
+}
+
+define ptr @bar(ptr addrspace(1) %p) {
+; CHECK-LABEL: bar:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    r0 = addr_space_cast(r1, 0, 1) # encoding: [0xbf,0x10,0x01,0x00,0x01,0x00,0x00,0x00]
+; CHECK-NEXT:    exit                           # encoding: [0x95,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+entry:
+  %0 = addrspacecast ptr addrspace(1) %p to ptr
+  ret ptr %0
+}
diff --git a/llvm/test/CodeGen/BPF/addr-space-gep-chain.ll b/llvm/test/CodeGen/BPF/addr-space-gep-chain.ll
new file mode 100644
index 00000000000000..3ac85fb9b12662
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-gep-chain.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s
+
+define void @test(ptr addrspace(1) %p) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME:    ptr addrspace(1) [[P:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
+; CHECK-NEXT:    [[A2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 8
+; CHECK-NEXT:    [[B3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 16
+; CHECK-NEXT:    [[C4:%.*]] = getelementptr inbounds i8, ptr [[B3]], i64 24
+; CHECK-NEXT:    [[D5:%.*]] = getelementptr inbounds i8, ptr [[C4]], i64 32
+; CHECK-NEXT:    store i64 11, ptr [[C4]], align 8
+; CHECK-NEXT:    store i64 22, ptr [[D5]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = getelementptr inbounds i8, ptr addrspace(1) %p, i64 8
+  %b = getelementptr inbounds i8, ptr addrspace(1) %a, i64 16
+  %c = getelementptr inbounds i8, ptr addrspace(1) %b, i64 24
+  %d = getelementptr inbounds i8, ptr addrspace(1) %c, i64 32
+  store i64 11, ptr addrspace(1) %c, align 8
+  store i64 22, ptr addrspace(1) %d, align 8
+  ret void
+}
diff --git a/llvm/test/CodeGen/BPF/addr-space-globals.ll b/llvm/test/CodeGen/BPF/addr-space-globals.ll
new file mode 100644
index 00000000000000..878ba0dfce6cd1
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-globals.ll
@@ -0,0 +1,30 @@
+; RUN: llc -march=bpfel -mcpu=v4 < %s | FileCheck %s
+
+; Generated from the following C code:
+;
+;   #define __as __attribute__((address_space(272)))
+;   __as const char a[2] = {1,2};
+;   __as char b[2] = {3,4};
+;   __as char c[2];
+;
+; Using the following command:
+;
+;   clang --target=bpf -O2 -S -emit-llvm -o t.ll t.c
+
+
+ at a = dso_local local_unnamed_addr addrspace(272) constant [2 x i8] [i8 1, i8 2], align 1
+ at b = dso_local local_unnamed_addr addrspace(272) global [2 x i8] [i8 3, i8 4], align 1
+ at c = dso_local local_unnamed_addr addrspace(272) global [2 x i8] zeroinitializer, align 1
+
+; Verify that a,b,c reside in the same section
+
+; CHECK:     .section .arena.272,"aw", at progbits
+; CHECK-NOT: .section
+; CHECK:     .globl  a
+; CHECK:     .ascii  "\001\002"
+; CHECK-NOT: .section
+; CHECK:     .globl  b
+; CHECK:     .ascii  "\003\004"
+; CHECK-NOT: .section
+; CHECK:     .globl  c
+; CHECK:     .zero   2
diff --git a/llvm/test/CodeGen/BPF/addr-space-globals2.ll b/llvm/test/CodeGen/BPF/addr-space-globals2.ll
new file mode 100644
index 00000000000000..d1e2318948751e
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-globals2.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=bpfel -mcpu=v4 < %s | FileCheck %s
+
+; Generated from the following C code:
+;
+;   __attribute__((address_space(1))) char a[2] = {1,2};
+;   __attribute__((address_space(2))) char b[2] = {3,4};
+;
+; Using the following command:
+;
+;   clang --target=bpf -O2 -S -emit-llvm -o t.ll t.c
+
+ at a = dso_local local_unnamed_addr addrspace(1) global [2 x i8] [i8 1, i8 2], align 1
+ at b = dso_local local_unnamed_addr addrspace(2) global [2 x i8] [i8 3, i8 4], align 1
+
+; Verify that a,b reside in separate sections
+
+; CHECK:     .section .arena.1,"aw", at progbits
+; CHECK-NOT: .section
+; CHECK:     .globl  a
+; CHECK:     .ascii  "\001\002"
+
+; CHECK:     .section .arena.2,"aw", at progbits
+; CHECK-NOT: .section
+; CHECK:     .globl  b
+; CHECK:     .ascii  "\003\004"
diff --git a/llvm/test/CodeGen/BPF/addr-space-phi.ll b/llvm/test/CodeGen/BPF/addr-space-phi.ll
new file mode 100644
index 00000000000000..6d28b071f28086
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-phi.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s
+
+; Generated from the following C code:
+;
+;   #define __uptr __attribute__((address_space(1)))
+;
+;   extern int __uptr *magic1();
+;   extern int __uptr *magic2();
+;
+;   void test(long i) {
+;     int __uptr *a;
+;
+;     if (i > 42)
+;       a = magic1();
+;     else
+;       a = magic2();
+;     a[5] = 7;
+;   }
+;
+; Using the following command:
+;
+;   clang --target=bpf -O2 -S -emit-llvm -o t.ll t.c
+
+define void @test(i64 noundef %i) {
+; CHECK:       if.end:
+; CHECK-NEXT:    [[A_0:%.*]] = phi ptr addrspace(1)
+; CHECK-NEXT:    [[A_01:%.*]] = addrspacecast ptr addrspace(1) [[A_0]] to ptr
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A_01]], i64 5
+; CHECK-NEXT:    store i32 7, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp = icmp sgt i64 %i, 42
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %call = tail call ptr addrspace(1) @magic1()
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %call1 = tail call ptr addrspace(1) @magic2()
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %a.0 = phi ptr addrspace(1) [ %call, %if.then ], [ %call1, %if.else ]
+  %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %a.0, i64 5
+  store i32 7, ptr addrspace(1) %arrayidx, align 4
+  ret void
+}
+
+declare ptr addrspace(1) @magic1(...)
+declare ptr addrspace(1) @magic2(...)
diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-1.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-1.ll
new file mode 100644
index 00000000000000..32d67284d1c1b7
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-simplify-1.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s
+
+; Check that bpf-aspace-simplify pass removes unnecessary (for BPF)
+; address space casts for cast M->N -> GEP -> cast N->M chain.
+
+define dso_local ptr addrspace(1) @test (ptr addrspace(1) %p) {
+; CHECK-LABEL: define dso_local ptr addrspace(1) @test(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[P]], i64 8
+; CHECK-NEXT:    ret ptr addrspace(1) [[B1]]
+;
+  entry:
+  %a = addrspacecast ptr addrspace(1) %p to ptr
+  %b = getelementptr inbounds i8, ptr %a, i64 8
+  %c = addrspacecast ptr %b to ptr addrspace(1)
+  ret ptr addrspace(1) %c
+}
diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-2.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-2.ll
new file mode 100644
index 00000000000000..a2965554a97330
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-simplify-2.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s
+
+; Check that bpf-aspace-simplify pass does not change
+; chain 'cast M->N -> GEP -> cast N->K'.
+
+define dso_local ptr addrspace(2) @test (ptr addrspace(1) %p) {
+; CHECK-LABEL: define dso_local ptr addrspace(2) @test(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
+; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8
+; CHECK-NEXT:    [[C:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(2)
+; CHECK-NEXT:    ret ptr addrspace(2) [[C]]
+;
+  entry:
+  %a = addrspacecast ptr addrspace(1) %p to ptr
+  %b = getelementptr inbounds i8, ptr %a, i64 8
+  %c = addrspacecast ptr %b to ptr addrspace(2)
+  ret ptr addrspace(2) %c
+}
diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-3.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-3.ll
new file mode 100644
index 00000000000000..a7736c462b44b3
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-simplify-3.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s
+
+; Check that when bpf-aspace-simplify pass modifies chain
+; 'cast M->N -> GEP -> cast N->M' it does not remove GEP,
+; when that GEP is used by some other instruction.
+
+define dso_local ptr addrspace(1) @test (ptr addrspace(1) %p) {
+; CHECK-LABEL: define dso_local ptr addrspace(1) @test(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr
+; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8
+; CHECK-NEXT:    [[B1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[P]], i64 8
+; CHECK-NEXT:    call void @sink(ptr [[B]])
+; CHECK-NEXT:    ret ptr addrspace(1) [[B1]]
+;
+  entry:
+  %a = addrspacecast ptr addrspace(1) %p to ptr
+  %b = getelementptr inbounds i8, ptr %a, i64 8
+  %c = addrspacecast ptr %b to ptr addrspace(1)
+  call void @sink(ptr %b)
+  ret ptr addrspace(1) %c
+}
+
+declare dso_local void @sink(ptr)
diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-4.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-4.ll
new file mode 100644
index 00000000000000..b2c384bbb6abd3
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-simplify-4.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s
+
+; Check that bpf-aspace-simplify pass simplifies chain
+; 'cast K->M -> cast M->N -> GEP -> cast N->M -> cast M->K'.
+
+define dso_local ptr addrspace(2) @test (ptr addrspace(2) %p) {
+; CHECK-LABEL: define dso_local ptr addrspace(2) @test(
+; CHECK-SAME: ptr addrspace(2) [[P:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C12:%.*]] = getelementptr inbounds i8, ptr addrspace(2) [[P]], i64 8
+; CHECK-NEXT:    ret ptr addrspace(2) [[C12]]
+;
+  entry:
+  %a = addrspacecast ptr addrspace(2) %p to ptr addrspace(1)
+  %b = addrspacecast ptr addrspace(1) %a to ptr
+  %c = getelementptr inbounds i8, ptr %b, i64 8
+  %d = addrspacecast ptr %c to ptr addrspace(1)
+  %e = addrspacecast ptr addrspace (1) %d to ptr addrspace(2)
+  ret ptr addrspace(2) %e
+}
diff --git a/llvm/test/CodeGen/BPF/addr-space-simplify-5.ll b/llvm/test/CodeGen/BPF/addr-space-simplify-5.ll
new file mode 100644
index 00000000000000..b62d25384d9583
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-simplify-5.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=bpf-aspace-simplify -mtriple=bpf-pc-linux -S < %s | FileCheck %s
+
+; Check that bpf-aspace-simplify pass removes unnecessary (for BPF)
+; address space casts for cast M->N -> GEP -> cast N->M chain,
+; where chain is split between several BBs.
+
+define dso_local ptr addrspace(1) @test (ptr addrspace(1) %p) {
+; CHECK-LABEL: define dso_local ptr addrspace(1) @test(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[B1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[P]], i64 8
+; CHECK-NEXT:    ret ptr addrspace(1) [[B1]]
+;
+entry:
+  %a = addrspacecast ptr addrspace(1) %p to ptr
+  %b = getelementptr inbounds i8, ptr %a, i64 8
+  br label %exit
+
+exit:
+  %c = addrspacecast ptr %b to ptr addrspace(1)
+  ret ptr addrspace(1) %c
+}
diff --git a/llvm/test/CodeGen/BPF/assembler-disassembler.s b/llvm/test/CodeGen/BPF/assembler-disassembler.s
index 2bc7421c2471c2..991d6edc683a30 100644
--- a/llvm/test/CodeGen/BPF/assembler-disassembler.s
+++ b/llvm/test/CodeGen/BPF/assembler-disassembler.s
@@ -289,3 +289,10 @@ r0 = *(u32*)skb[42]
 r0 = *(u8*)skb[r1]
 r0 = *(u16*)skb[r1]
 r0 = *(u32*)skb[r1]
+
+// CHECK: bf 10 01 00 01 00 00 00	r0 = addr_space_cast(r1, 0x0, 0x1)
+// CHECK: bf 21 01 00 00 00 01 00	r1 = addr_space_cast(r2, 0x1, 0x0)
+// CHECK: bf 43 01 00 2a 00 07 00	r3 = addr_space_cast(r4, 0x7, 0x2a)
+r0 = addr_space_cast(r1, 0, 1)
+r1 = addr_space_cast(r2, 1, 0)
+r3 = addr_space_cast(r4, 7, 42)

>From 883628ad6c52f515d4d0aa1b5ed24f92ce26a75e Mon Sep 17 00:00:00 2001
From: Yinying Li <yinyingli at google.com>
Date: Tue, 12 Mar 2024 23:35:22 +0000
Subject: [PATCH 09/10] migrate more

---
 .../SparseTensor/CPU/sparse_conversion.mlir   | 367 ++++++++----------
 .../CPU/sparse_conversion_block.mlir          |  70 ++--
 .../CPU/sparse_conversion_dyn.mlir            | 122 +++---
 .../CPU/sparse_conversion_element.mlir        |   4 +-
 .../CPU/sparse_conversion_ptr.mlir            | 144 ++++---
 .../CPU/sparse_conversion_sparse2dense.mlir   |   4 +-
 .../CPU/sparse_conversion_sparse2sparse.mlir  |   4 +-
 .../SparseTensor/CPU/sparse_coo_test.mlir     |  66 ++--
 .../CPU/sparse_dilated_conv_2d_nhwc_hwcf.mlir |   4 +-
 .../Dialect/SparseTensor/CPU/sparse_dot.mlir  |  38 +-
 .../SparseTensor/CPU/sparse_expand.mlir       |  32 +-
 .../SparseTensor/CPU/sparse_expand_shape.mlir | 123 ++++--
 .../CPU/sparse_filter_conv2d.mlir             |  32 +-
 .../SparseTensor/CPU/sparse_flatten.mlir      |   4 +-
 .../CPU/sparse_foreach_slices.mlir            |   4 +-
 .../SparseTensor/CPU/sparse_generate.mlir     |  20 +-
 .../SparseTensor/CPU/sparse_index.mlir        | 163 +++++---
 .../SparseTensor/CPU/sparse_index_dense.mlir  |   4 +-
 .../SparseTensor/CPU/sparse_insert_3d.mlir    |   4 +-
 19 files changed, 671 insertions(+), 538 deletions(-)

diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion.mlir
index f13c1c66df6dce..8024c128189597 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion.mlir
@@ -10,7 +10,7 @@
 // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
 // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
 // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
-// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
 // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
 // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
 //
@@ -46,28 +46,10 @@
 // Integration test that tests conversions between sparse tensors.
 //
 module {
-  //
-  // Output utilities.
-  //
-  func.func @dumpf64(%arg0: memref<?xf64>) {
-    %c0 = arith.constant 0 : index
-    %d0 = arith.constant -1.0 : f64
-    %0 = vector.transfer_read %arg0[%c0], %d0: memref<?xf64>, vector<24xf64>
-    vector.print %0 : vector<24xf64>
-    return
-  }
-  func.func @dumpidx(%arg0: memref<?xindex>) {
-    %c0 = arith.constant 0 : index
-    %d0 = arith.constant 0 : index
-    %0 = vector.transfer_read %arg0[%c0], %d0: memref<?xindex>, vector<25xindex>
-    vector.print %0 : vector<25xindex>
-    return
-  }
-
   //
   // Main driver.
   //
-  func.func @entry() {
+  func.func @main() {
     %c0 = arith.constant 0 : index
     %c1 = arith.constant 1 : index
     %c2 = arith.constant 2 : index
@@ -110,195 +92,176 @@ module {
     %i = sparse_tensor.convert %3 : tensor<2x3x4xf64, #Tensor3> to tensor<2x3x4xf64, #Tensor3>
 
     //
-    // Check number_of_entries.
+    // Verify the outputs.
     //
-    // CHECK-COUNT-12: 24
-    %nv1 = sparse_tensor.number_of_entries %1 : tensor<2x3x4xf64, #Tensor1>
-    %nv2 = sparse_tensor.number_of_entries %2 : tensor<2x3x4xf64, #Tensor2>
-    %nv3 = sparse_tensor.number_of_entries %3 : tensor<2x3x4xf64, #Tensor3>
-    %nav = sparse_tensor.number_of_entries %a : tensor<2x3x4xf64, #Tensor1>
-    %nbv = sparse_tensor.number_of_entries %b : tensor<2x3x4xf64, #Tensor1>
-    %ncv = sparse_tensor.number_of_entries %c : tensor<2x3x4xf64, #Tensor1>
-    %ndv = sparse_tensor.number_of_entries %d : tensor<2x3x4xf64, #Tensor2>
-    %nev = sparse_tensor.number_of_entries %e : tensor<2x3x4xf64, #Tensor2>
-    %nfv = sparse_tensor.number_of_entries %f : tensor<2x3x4xf64, #Tensor2>
-    %ngv = sparse_tensor.number_of_entries %g : tensor<2x3x4xf64, #Tensor3>
-    %nhv = sparse_tensor.number_of_entries %h : tensor<2x3x4xf64, #Tensor3>
-    %niv = sparse_tensor.number_of_entries %i : tensor<2x3x4xf64, #Tensor3>
-    vector.print %nv1 : index
-    vector.print %nv2 : index
-    vector.print %nv3 : index
-    vector.print %nav : index
-    vector.print %nbv : index
-    vector.print %ncv : index
-    vector.print %ndv : index
-    vector.print %nev : index
-    vector.print %nfv : index
-    vector.print %ngv : index
-    vector.print %nhv : index
-    vector.print %niv : index
-
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 24
+    // CHECK-NEXT: dim = ( 2, 3, 4 )
+    // CHECK-NEXT: lvl = ( 2, 3, 4 )
+    // CHECK-NEXT: pos[0] : ( 0, 2
+    // CHECK-NEXT: crd[0] : ( 0, 1
+    // CHECK-NEXT: pos[1] : ( 0, 3, 6
+    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 0, 1, 2
+    // CHECK-NEXT: pos[2] : ( 0, 4, 8, 12, 16, 20, 24
+    // CHECK-NEXT: crd[2] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
+    // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24
+    // CHECK-NEXT: ----
     //
-    // Check values.
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 24
+    // CHECK-NEXT: dim = ( 2, 3, 4 )
+    // CHECK-NEXT: lvl = ( 3, 4, 2 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2
+    // CHECK-NEXT: pos[1] : ( 0, 4, 8, 12
+    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
+    // CHECK-NEXT: pos[2] : ( 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24
+    // CHECK-NEXT: crd[2] : ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+    // CHECK-NEXT: values : ( 1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18, 7, 19, 8, 20, 9, 21, 10, 22, 11, 23, 12, 24
+    // CHECK-NEXT: ----
     //
-    // CHECK:      ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 )
-    // CHECK-NEXT: ( 1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18, 7, 19, 8, 20, 9, 21, 10, 22, 11, 23, 12, 24 )
-    // CHECK-NEXT: ( 1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24 )
-    // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 )
-    // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 )
-    // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 )
-    // CHECK-NEXT: ( 1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18, 7, 19, 8, 20, 9, 21, 10, 22, 11, 23, 12, 24 )
-    // CHECK-NEXT: ( 1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18, 7, 19, 8, 20, 9, 21, 10, 22, 11, 23, 12, 24 )
-    // CHECK-NEXT: ( 1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18, 7, 19, 8, 20, 9, 21, 10, 22, 11, 23, 12, 24 )
-    // CHECK-NEXT: ( 1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24 )
-    // CHECK-NEXT: ( 1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24 )
-    // CHECK-NEXT: ( 1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24 )
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 24
+    // CHECK-NEXT: dim = ( 2, 3, 4 )
+    // CHECK-NEXT: lvl = ( 4, 2, 3 )
+    // CHECK-NEXT: pos[0] : ( 0, 4
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3
+    // CHECK-NEXT: pos[1] : ( 0, 2, 4, 6, 8
+    // CHECK-NEXT: crd[1] : ( 0, 1, 0, 1, 0, 1, 0, 1
+    // CHECK-NEXT: pos[2] : ( 0, 3, 6, 9, 12, 15, 18, 21, 24
+    // CHECK-NEXT: crd[2] : ( 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2
+    // CHECK-NEXT: values : ( 1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24
+    // CHECK-NEXT: ----
     //
-    %v1 = sparse_tensor.values %1 : tensor<2x3x4xf64, #Tensor1> to memref<?xf64>
-    %v2 = sparse_tensor.values %2 : tensor<2x3x4xf64, #Tensor2> to memref<?xf64>
-    %v3 = sparse_tensor.values %3 : tensor<2x3x4xf64, #Tensor3> to memref<?xf64>
-    %av = sparse_tensor.values %a : tensor<2x3x4xf64, #Tensor1> to memref<?xf64>
-    %bv = sparse_tensor.values %b : tensor<2x3x4xf64, #Tensor1> to memref<?xf64>
-    %cv = sparse_tensor.values %c : tensor<2x3x4xf64, #Tensor1> to memref<?xf64>
-    %dv = sparse_tensor.values %d : tensor<2x3x4xf64, #Tensor2> to memref<?xf64>
-    %ev = sparse_tensor.values %e : tensor<2x3x4xf64, #Tensor2> to memref<?xf64>
-    %fv = sparse_tensor.values %f : tensor<2x3x4xf64, #Tensor2> to memref<?xf64>
-    %gv = sparse_tensor.values %g : tensor<2x3x4xf64, #Tensor3> to memref<?xf64>
-    %hv = sparse_tensor.values %h : tensor<2x3x4xf64, #Tensor3> to memref<?xf64>
-    %iv = sparse_tensor.values %i : tensor<2x3x4xf64, #Tensor3> to memref<?xf64>
-
-    call @dumpf64(%v1) : (memref<?xf64>) -> ()
-    call @dumpf64(%v2) : (memref<?xf64>) -> ()
-    call @dumpf64(%v3) : (memref<?xf64>) -> ()
-    call @dumpf64(%av) : (memref<?xf64>) -> ()
-    call @dumpf64(%bv) : (memref<?xf64>) -> ()
-    call @dumpf64(%cv) : (memref<?xf64>) -> ()
-    call @dumpf64(%dv) : (memref<?xf64>) -> ()
-    call @dumpf64(%ev) : (memref<?xf64>) -> ()
-    call @dumpf64(%fv) : (memref<?xf64>) -> ()
-    call @dumpf64(%gv) : (memref<?xf64>) -> ()
-    call @dumpf64(%hv) : (memref<?xf64>) -> ()
-    call @dumpf64(%iv) : (memref<?xf64>) -> ()
-
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 24
+    // CHECK-NEXT: dim = ( 2, 3, 4 )
+    // CHECK-NEXT: lvl = ( 2, 3, 4 )
+    // CHECK-NEXT: pos[0] : ( 0, 2
+    // CHECK-NEXT: crd[0] : ( 0, 1
+    // CHECK-NEXT: pos[1] : ( 0, 3, 6
+    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 0, 1, 2
+    // CHECK-NEXT: pos[2] : ( 0, 4, 8, 12, 16, 20, 24
+    // CHECK-NEXT: crd[2] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
+    // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24
+    // CHECK-NEXT: ----
     //
-    // Check coordinates.
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 24
+    // CHECK-NEXT: dim = ( 2, 3, 4 )
+    // CHECK-NEXT: lvl = ( 2, 3, 4 )
+    // CHECK-NEXT: pos[0] : ( 0, 2
+    // CHECK-NEXT: crd[0] : ( 0, 1
+    // CHECK-NEXT: pos[1] : ( 0, 3, 6
+    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 0, 1, 2
+    // CHECK-NEXT: pos[2] : ( 0, 4, 8, 12, 16, 20, 24
+    // CHECK-NEXT: crd[2] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
+    // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24
+    // CHECK-NEXT: ----
     //
-    // CHECK-NEXT: ( 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 )
-    // CHECK-NEXT: ( 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0 )
-    // CHECK-NEXT: ( 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0 )
-    // CHECK-NEXT: ( 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-    // CHECK-NEXT: ( 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 )
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 24
+    // CHECK-NEXT: dim = ( 2, 3, 4 )
+    // CHECK-NEXT: lvl = ( 2, 3, 4 )
+    // CHECK-NEXT: pos[0] : ( 0, 2
+    // CHECK-NEXT: crd[0] : ( 0, 1
+    // CHECK-NEXT: pos[1] : ( 0, 3, 6
+    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 0, 1, 2
+    // CHECK-NEXT: pos[2] : ( 0, 4, 8, 12, 16, 20, 24
+    // CHECK-NEXT: crd[2] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
+    // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24
+    // CHECK-NEXT: ----
     //
-    %v10 = sparse_tensor.coordinates %1 { level = 0 : index } : tensor<2x3x4xf64, #Tensor1> to memref<?xindex>
-    %v11 = sparse_tensor.coordinates %1 { level = 1 : index } : tensor<2x3x4xf64, #Tensor1> to memref<?xindex>
-    %v12 = sparse_tensor.coordinates %1 { level = 2 : index } : tensor<2x3x4xf64, #Tensor1> to memref<?xindex>
-    %v20 = sparse_tensor.coordinates %2 { level = 0 : index } : tensor<2x3x4xf64, #Tensor2> to memref<?xindex>
-    %v21 = sparse_tensor.coordinates %2 { level = 1 : index } : tensor<2x3x4xf64, #Tensor2> to memref<?xindex>
-    %v22 = sparse_tensor.coordinates %2 { level = 2 : index } : tensor<2x3x4xf64, #Tensor2> to memref<?xindex>
-    %v30 = sparse_tensor.coordinates %3 { level = 0 : index } : tensor<2x3x4xf64, #Tensor3> to memref<?xindex>
-    %v31 = sparse_tensor.coordinates %3 { level = 1 : index } : tensor<2x3x4xf64, #Tensor3> to memref<?xindex>
-    %v32 = sparse_tensor.coordinates %3 { level = 2 : index } : tensor<2x3x4xf64, #Tensor3> to memref<?xindex>
-
-    %a10 = sparse_tensor.coordinates %a { level = 0 : index } : tensor<2x3x4xf64, #Tensor1> to memref<?xindex>
-    %a11 = sparse_tensor.coordinates %a { level = 1 : index } : tensor<2x3x4xf64, #Tensor1> to memref<?xindex>
-    %a12 = sparse_tensor.coordinates %a { level = 2 : index } : tensor<2x3x4xf64, #Tensor1> to memref<?xindex>
-    %b10 = sparse_tensor.coordinates %b { level = 0 : index } : tensor<2x3x4xf64, #Tensor1> to memref<?xindex>
-    %b11 = sparse_tensor.coordinates %b { level = 1 : index } : tensor<2x3x4xf64, #Tensor1> to memref<?xindex>
-    %b12 = sparse_tensor.coordinates %b { level = 2 : index } : tensor<2x3x4xf64, #Tensor1> to memref<?xindex>
-    %c10 = sparse_tensor.coordinates %c { level = 0 : index } : tensor<2x3x4xf64, #Tensor1> to memref<?xindex>
-    %c11 = sparse_tensor.coordinates %c { level = 1 : index } : tensor<2x3x4xf64, #Tensor1> to memref<?xindex>
-    %c12 = sparse_tensor.coordinates %c { level = 2 : index } : tensor<2x3x4xf64, #Tensor1> to memref<?xindex>
-
-    %d20 = sparse_tensor.coordinates %d { level = 0 : index } : tensor<2x3x4xf64, #Tensor2> to memref<?xindex>
-    %d21 = sparse_tensor.coordinates %d { level = 1 : index } : tensor<2x3x4xf64, #Tensor2> to memref<?xindex>
-    %d22 = sparse_tensor.coordinates %d { level = 2 : index } : tensor<2x3x4xf64, #Tensor2> to memref<?xindex>
-    %e20 = sparse_tensor.coordinates %e { level = 0 : index } : tensor<2x3x4xf64, #Tensor2> to memref<?xindex>
-    %e21 = sparse_tensor.coordinates %e { level = 1 : index } : tensor<2x3x4xf64, #Tensor2> to memref<?xindex>
-    %e22 = sparse_tensor.coordinates %e { level = 2 : index } : tensor<2x3x4xf64, #Tensor2> to memref<?xindex>
-    %f20 = sparse_tensor.coordinates %f { level = 0 : index } : tensor<2x3x4xf64, #Tensor2> to memref<?xindex>
-    %f21 = sparse_tensor.coordinates %f { level = 1 : index } : tensor<2x3x4xf64, #Tensor2> to memref<?xindex>
-    %f22 = sparse_tensor.coordinates %f { level = 2 : index } : tensor<2x3x4xf64, #Tensor2> to memref<?xindex>
-
-    %g30 = sparse_tensor.coordinates %g { level = 0 : index } : tensor<2x3x4xf64, #Tensor3> to memref<?xindex>
-    %g31 = sparse_tensor.coordinates %g { level = 1 : index } : tensor<2x3x4xf64, #Tensor3> to memref<?xindex>
-    %g32 = sparse_tensor.coordinates %g { level = 2 : index } : tensor<2x3x4xf64, #Tensor3> to memref<?xindex>
-    %h30 = sparse_tensor.coordinates %h { level = 0 : index } : tensor<2x3x4xf64, #Tensor3> to memref<?xindex>
-    %h31 = sparse_tensor.coordinates %h { level = 1 : index } : tensor<2x3x4xf64, #Tensor3> to memref<?xindex>
-    %h32 = sparse_tensor.coordinates %h { level = 2 : index } : tensor<2x3x4xf64, #Tensor3> to memref<?xindex>
-    %i30 = sparse_tensor.coordinates %i { level = 0 : index } : tensor<2x3x4xf64, #Tensor3> to memref<?xindex>
-    %i31 = sparse_tensor.coordinates %i { level = 1 : index } : tensor<2x3x4xf64, #Tensor3> to memref<?xindex>
-    %i32 = sparse_tensor.coordinates %i { level = 2 : index } : tensor<2x3x4xf64, #Tensor3> to memref<?xindex>
-
-    call @dumpidx(%v10) : (memref<?xindex>) -> ()
-    call @dumpidx(%v11) : (memref<?xindex>) -> ()
-    call @dumpidx(%v12) : (memref<?xindex>) -> ()
-    call @dumpidx(%v20) : (memref<?xindex>) -> ()
-    call @dumpidx(%v21) : (memref<?xindex>) -> ()
-    call @dumpidx(%v22) : (memref<?xindex>) -> ()
-    call @dumpidx(%v30) : (memref<?xindex>) -> ()
-    call @dumpidx(%v31) : (memref<?xindex>) -> ()
-    call @dumpidx(%v32) : (memref<?xindex>) -> ()
-
-    call @dumpidx(%a10) : (memref<?xindex>) -> ()
-    call @dumpidx(%a11) : (memref<?xindex>) -> ()
-    call @dumpidx(%a12) : (memref<?xindex>) -> ()
-    call @dumpidx(%b10) : (memref<?xindex>) -> ()
-    call @dumpidx(%b11) : (memref<?xindex>) -> ()
-    call @dumpidx(%b12) : (memref<?xindex>) -> ()
-    call @dumpidx(%c10) : (memref<?xindex>) -> ()
-    call @dumpidx(%c11) : (memref<?xindex>) -> ()
-    call @dumpidx(%c12) : (memref<?xindex>) -> ()
-
-    call @dumpidx(%d20) : (memref<?xindex>) -> ()
-    call @dumpidx(%d21) : (memref<?xindex>) -> ()
-    call @dumpidx(%d22) : (memref<?xindex>) -> ()
-    call @dumpidx(%e20) : (memref<?xindex>) -> ()
-    call @dumpidx(%e21) : (memref<?xindex>) -> ()
-    call @dumpidx(%e22) : (memref<?xindex>) -> ()
-    call @dumpidx(%f20) : (memref<?xindex>) -> ()
-    call @dumpidx(%f21) : (memref<?xindex>) -> ()
-    call @dumpidx(%f22) : (memref<?xindex>) -> ()
-
-    call @dumpidx(%g30) : (memref<?xindex>) -> ()
-    call @dumpidx(%g31) : (memref<?xindex>) -> ()
-    call @dumpidx(%g32) : (memref<?xindex>) -> ()
-    call @dumpidx(%h30) : (memref<?xindex>) -> ()
-    call @dumpidx(%h31) : (memref<?xindex>) -> ()
-    call @dumpidx(%h32) : (memref<?xindex>) -> ()
-    call @dumpidx(%i30) : (memref<?xindex>) -> ()
-    call @dumpidx(%i31) : (memref<?xindex>) -> ()
-    call @dumpidx(%i32) : (memref<?xindex>) -> ()
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 24
+    // CHECK-NEXT: dim = ( 2, 3, 4 )
+    // CHECK-NEXT: lvl = ( 3, 4, 2 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2
+    // CHECK-NEXT: pos[1] : ( 0, 4, 8, 12
+    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
+    // CHECK-NEXT: pos[2] : ( 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24
+    // CHECK-NEXT: crd[2] : ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+    // CHECK-NEXT: values : ( 1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18, 7, 19, 8, 20, 9, 21, 10, 22, 11, 23, 12, 24
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 24
+    // CHECK-NEXT: dim = ( 2, 3, 4 )
+    // CHECK-NEXT: lvl = ( 3, 4, 2 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2
+    // CHECK-NEXT: pos[1] : ( 0, 4, 8, 12
+    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
+    // CHECK-NEXT: pos[2] : ( 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24
+    // CHECK-NEXT: crd[2] : ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+    // CHECK-NEXT: values : ( 1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18, 7, 19, 8, 20, 9, 21, 10, 22, 11, 23, 12, 24
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 24
+    // CHECK-NEXT: dim = ( 2, 3, 4 )
+    // CHECK-NEXT: lvl = ( 3, 4, 2 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2
+    // CHECK-NEXT: pos[1] : ( 0, 4, 8, 12
+    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
+    // CHECK-NEXT: pos[2] : ( 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24
+    // CHECK-NEXT: crd[2] : ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+    // CHECK-NEXT: values : ( 1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18, 7, 19, 8, 20, 9, 21, 10, 22, 11, 23, 12, 24
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 24
+    // CHECK-NEXT: dim = ( 2, 3, 4 )
+    // CHECK-NEXT: lvl = ( 4, 2, 3 )
+    // CHECK-NEXT: pos[0] : ( 0, 4
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3
+    // CHECK-NEXT: pos[1] : ( 0, 2, 4, 6, 8
+    // CHECK-NEXT: crd[1] : ( 0, 1, 0, 1, 0, 1, 0, 1
+    // CHECK-NEXT: pos[2] : ( 0, 3, 6, 9, 12, 15, 18, 21, 24
+    // CHECK-NEXT: crd[2] : ( 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2
+    // CHECK-NEXT: values : ( 1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 24
+    // CHECK-NEXT: dim = ( 2, 3, 4 )
+    // CHECK-NEXT: lvl = ( 4, 2, 3 )
+    // CHECK-NEXT: pos[0] : ( 0, 4
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3
+    // CHECK-NEXT: pos[1] : ( 0, 2, 4, 6, 8
+    // CHECK-NEXT: crd[1] : ( 0, 1, 0, 1, 0, 1, 0, 1
+    // CHECK-NEXT: pos[2] : ( 0, 3, 6, 9, 12, 15, 18, 21, 24
+    // CHECK-NEXT: crd[2] : ( 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2
+    // CHECK-NEXT: values : ( 1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 24
+    // CHECK-NEXT: dim = ( 2, 3, 4 )
+    // CHECK-NEXT: lvl = ( 4, 2, 3 )
+    // CHECK-NEXT: pos[0] : ( 0, 4
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3
+    // CHECK-NEXT: pos[1] : ( 0, 2, 4, 6, 8
+    // CHECK-NEXT: crd[1] : ( 0, 1, 0, 1, 0, 1, 0, 1
+    // CHECK-NEXT: pos[2] : ( 0, 3, 6, 9, 12, 15, 18, 21, 24
+    // CHECK-NEXT: crd[2] : ( 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2
+    // CHECK-NEXT: values : ( 1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24
+    // CHECK-NEXT: ----
+    //
+    sparse_tensor.print %1 : tensor<2x3x4xf64, #Tensor1>
+    sparse_tensor.print %2 : tensor<2x3x4xf64, #Tensor2>
+    sparse_tensor.print %3 : tensor<2x3x4xf64, #Tensor3>
+    sparse_tensor.print %a : tensor<2x3x4xf64, #Tensor1>
+    sparse_tensor.print %b : tensor<2x3x4xf64, #Tensor1>
+    sparse_tensor.print %c : tensor<2x3x4xf64, #Tensor1>
+    sparse_tensor.print %d : tensor<2x3x4xf64, #Tensor2>
+    sparse_tensor.print %e : tensor<2x3x4xf64, #Tensor2>
+    sparse_tensor.print %f : tensor<2x3x4xf64, #Tensor2>
+    sparse_tensor.print %g : tensor<2x3x4xf64, #Tensor3>
+    sparse_tensor.print %h : tensor<2x3x4xf64, #Tensor3>
+    sparse_tensor.print %i : tensor<2x3x4xf64, #Tensor3>
 
     // Release the resources.
     bufferization.dealloc_tensor %1 : tensor<2x3x4xf64, #Tensor1>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_block.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_block.mlir
index 809414ba977d2b..ff22283f43a793 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_block.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_block.mlir
@@ -10,7 +10,7 @@
 // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
 // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
 // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
-// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
 // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
 // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
 //
@@ -52,21 +52,10 @@
 // Integration test that tests conversions between sparse tensors.
 //
 module {
-  //
-  // Output utilities.
-  //
-  func.func @dumpf64(%arg0: memref<?xf64>) {
-    %c0 = arith.constant 0 : index
-    %d0 = arith.constant -1.0 : f64
-    %0 = vector.transfer_read %arg0[%c0], %d0: memref<?xf64>, vector<8xf64>
-    vector.print %0 : vector<8xf64>
-    return
-  }
-
   //
   // Main driver.
   //
-  func.func @entry() {
+  func.func @main() {
     %c0 = arith.constant 0 : index
     %c1 = arith.constant 1 : index
     %c2 = arith.constant 2 : index
@@ -88,20 +77,47 @@ module {
     %3 = sparse_tensor.convert %1 : tensor<2x4xf64, #BSR> to tensor<2x4xf64, #CSR>
     %4 = sparse_tensor.convert %1 : tensor<2x4xf64, #BSR> to tensor<2x4xf64, #CSC>
 
-    %v1 = sparse_tensor.values %1 : tensor<2x4xf64, #BSR> to memref<?xf64>
-    %v2 = sparse_tensor.values %2 : tensor<2x4xf64, #BSR> to memref<?xf64>
-    %v3 = sparse_tensor.values %3 : tensor<2x4xf64, #CSR> to memref<?xf64>
-    %v4 = sparse_tensor.values %4 : tensor<2x4xf64, #CSC> to memref<?xf64>
-
-
-    // CHECK:      ( 1, 2, 5, 6, 3, 4, 7, 8 )
-    // CHECK-NEXT: ( 1, 2, 5, 6, 3, 4, 7, 8 )
-    // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 8 )
-    // CHECK-NEXT: ( 1, 5, 2, 6, 3, 7, 4, 8 )
-    call @dumpf64(%v1) : (memref<?xf64>) -> ()
-    call @dumpf64(%v2) : (memref<?xf64>) -> ()
-    call @dumpf64(%v3) : (memref<?xf64>) -> ()
-    call @dumpf64(%v4) : (memref<?xf64>) -> ()
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 8
+    // CHECK-NEXT: dim = ( 2, 4 )
+    // CHECK-NEXT: lvl = ( 1, 2, 2, 2 )
+    // CHECK-NEXT: pos[1] : ( 0, 2
+    // CHECK-NEXT: crd[1] : ( 0, 1
+    // CHECK-NEXT: values : ( 1, 2, 5, 6, 3, 4, 7, 8
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 8
+    // CHECK-NEXT: dim = ( 2, 4 )
+    // CHECK-NEXT: lvl = ( 1, 2, 2, 2 )
+    // CHECK-NEXT: pos[1] : ( 0, 2
+    // CHECK-NEXT: crd[1] : ( 0, 1
+    // CHECK-NEXT: values : ( 1, 2, 5, 6, 3, 4, 7, 8
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 8
+    // CHECK-NEXT: dim = ( 2, 4 )
+    // CHECK-NEXT: lvl = ( 2, 4 )
+    // CHECK-NEXT: pos[1] : ( 0, 4, 8
+    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 0, 1, 2, 3
+    // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7, 8
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 8
+    // CHECK-NEXT: dim = ( 2, 4 )
+    // CHECK-NEXT: lvl = ( 4, 2 )
+    // CHECK-NEXT: pos[1] : ( 0, 2, 4, 6, 8
+    // CHECK-NEXT: crd[1] : ( 0, 1, 0, 1, 0, 1, 0, 1
+    // CHECK-NEXT: values : ( 1, 5, 2, 6, 3, 7, 4, 8
+    // CHECK-NEXT: ----
+    //
+    sparse_tensor.print %1 : tensor<2x4xf64, #BSR>
+    sparse_tensor.print %2 : tensor<2x4xf64, #BSR>
+    sparse_tensor.print %3 : tensor<2x4xf64, #CSR>
+    sparse_tensor.print %4 : tensor<2x4xf64, #CSC>
 
     // TODO: Fix memory leaks.
     bufferization.dealloc_tensor %1 : tensor<2x4xf64, #BSR>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_dyn.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_dyn.mlir
index f658457fa67356..11baf65e635091 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_dyn.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_dyn.mlir
@@ -10,7 +10,7 @@
 // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
 // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
 // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
-// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
 // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
 // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
 //
@@ -44,19 +44,7 @@
 // may change (the actual underlying sizes obviously never change).
 //
 module {
-
-  func.func private @printMemref1dF64(%ptr : memref<?xf64>) attributes { llvm.emit_c_interface }
-
-  //
-  // Helper method to print values array. The transfer actually
-  // reads more than required to verify size of buffer as well.
-  //
-  func.func @dump(%arg0: memref<?xf64>) {
-    call @printMemref1dF64(%arg0) : (memref<?xf64>) -> ()
-    return
-  }
-
-  func.func @entry() {
+  func.func @main() {
     %t1 = arith.constant sparse<
       [ [0,0], [0,1], [0,63], [1,0], [1,1], [31,0], [31,63] ],
         [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0 ]> : tensor<32x64xf64>
@@ -72,45 +60,81 @@ module {
     %5 = sparse_tensor.convert %3 : tensor<?x?xf64, #DCSR> to tensor<?x?xf64, #DCSC>
     %6 = sparse_tensor.convert %4 : tensor<?x?xf64, #DCSC> to tensor<?x?xf64, #DCSR>
 
-//
-    // Check number_of_entries.
     //
-    // CHECK-COUNT-6: 7
-    %n1 = sparse_tensor.number_of_entries %1 : tensor<?x?xf64, #DCSR>
-    %n2 = sparse_tensor.number_of_entries %2 : tensor<?x?xf64, #DCSC>
-    %n3 = sparse_tensor.number_of_entries %3 : tensor<?x?xf64, #DCSR>
-    %n4 = sparse_tensor.number_of_entries %4 : tensor<?x?xf64, #DCSC>
-    %n5 = sparse_tensor.number_of_entries %5 : tensor<?x?xf64, #DCSC>
-    %n6 = sparse_tensor.number_of_entries %6 : tensor<?x?xf64, #DCSR>
-    vector.print %n1 : index
-    vector.print %n2 : index
-    vector.print %n3 : index
-    vector.print %n4 : index
-    vector.print %n5 : index
-    vector.print %n6 : index
-
+    // Verify the outputs.
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 7
+    // CHECK-NEXT: dim = ( 32, 64 )
+    // CHECK-NEXT: lvl = ( 32, 64 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 31
+    // CHECK-NEXT: pos[1] : ( 0, 3, 5, 7
+    // CHECK-NEXT: crd[1] : ( 0, 1, 63, 0, 1, 0, 63
+    // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 7
+    // CHECK-NEXT: dim = ( 32, 64 )
+    // CHECK-NEXT: lvl = ( 64, 32 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 63
+    // CHECK-NEXT: pos[1] : ( 0, 3, 5, 7
+    // CHECK-NEXT: crd[1] : ( 0, 1, 31, 0, 1, 0, 31
+    // CHECK-NEXT: values : ( 1, 4, 6, 2, 5, 3, 7
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 7
+    // CHECK-NEXT: dim = ( 32, 64 )
+    // CHECK-NEXT: lvl = ( 32, 64 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 31
+    // CHECK-NEXT: pos[1] : ( 0, 3, 5, 7
+    // CHECK-NEXT: crd[1] : ( 0, 1, 63, 0, 1, 0, 63
+    // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 7
+    // CHECK-NEXT: dim = ( 32, 64 )
+    // CHECK-NEXT: lvl = ( 64, 32 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 63
+    // CHECK-NEXT: pos[1] : ( 0, 3, 5, 7
+    // CHECK-NEXT: crd[1] : ( 0, 1, 31, 0, 1, 0, 31
+    // CHECK-NEXT: values : ( 1, 4, 6, 2, 5, 3, 7
+    // CHECK-NEXT: ----
     //
-    // All proper row-/column-wise?
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 7
+    // CHECK-NEXT: dim = ( 32, 64 )
+    // CHECK-NEXT: lvl = ( 64, 32 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 63
+    // CHECK-NEXT: pos[1] : ( 0, 3, 5, 7
+    // CHECK-NEXT: crd[1] : ( 0, 1, 31, 0, 1, 0, 31
+    // CHECK-NEXT: values : ( 1, 4, 6, 2, 5, 3, 7
+    // CHECK-NEXT: ----
     //
-    // CHECK: [1,  2,  3,  4,  5,  6,  7
-    // CHECK: [1,  4,  6,  2,  5,  3,  7
-    // CHECK: [1,  2,  3,  4,  5,  6,  7
-    // CHECK: [1,  4,  6,  2,  5,  3,  7
-    // CHECK: [1,  4,  6,  2,  5,  3,  7
-    // CHECK: [1,  2,  3,  4,  5,  6,  7
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 7
+    // CHECK-NEXT: dim = ( 32, 64 )
+    // CHECK-NEXT: lvl = ( 32, 64 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 31
+    // CHECK-NEXT: pos[1] : ( 0, 3, 5, 7
+    // CHECK-NEXT: crd[1] : ( 0, 1, 63, 0, 1, 0, 63
+    // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7
+    // CHECK-NEXT: ----
     //
-    %m1 = sparse_tensor.values %1 : tensor<?x?xf64, #DCSR> to memref<?xf64>
-    %m2 = sparse_tensor.values %2 : tensor<?x?xf64, #DCSC> to memref<?xf64>
-    %m3 = sparse_tensor.values %3 : tensor<?x?xf64, #DCSR> to memref<?xf64>
-    %m4 = sparse_tensor.values %4 : tensor<?x?xf64, #DCSC> to memref<?xf64>
-    %m5 = sparse_tensor.values %5 : tensor<?x?xf64, #DCSC> to memref<?xf64>
-    %m6 = sparse_tensor.values %6 : tensor<?x?xf64, #DCSR> to memref<?xf64>
-    call @dump(%m1) : (memref<?xf64>) -> ()
-    call @dump(%m2) : (memref<?xf64>) -> ()
-    call @dump(%m3) : (memref<?xf64>) -> ()
-    call @dump(%m4) : (memref<?xf64>) -> ()
-    call @dump(%m5) : (memref<?xf64>) -> ()
-    call @dump(%m6) : (memref<?xf64>) -> ()
+    sparse_tensor.print %1 : tensor<?x?xf64, #DCSR>
+    sparse_tensor.print %2 : tensor<?x?xf64, #DCSC>
+    sparse_tensor.print %3 : tensor<?x?xf64, #DCSR>
+    sparse_tensor.print %4 : tensor<?x?xf64, #DCSC>
+    sparse_tensor.print %5 : tensor<?x?xf64, #DCSC>
+    sparse_tensor.print %6 : tensor<?x?xf64, #DCSR>
 
     // Release the resources.
     bufferization.dealloc_tensor %1 : tensor<?x?xf64, #DCSR>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_element.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_element.mlir
index 81f68366be28c6..a2ec6df392aaa3 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_element.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_element.mlir
@@ -10,7 +10,7 @@
 // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
 // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
 // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
-// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
 // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
 // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
 //
@@ -55,7 +55,7 @@ module {
   //
   // The first test suite (for non-singleton LevelTypes).
   //
-  func.func @entry() {
+  func.func @main() {
     //
     // Initialize a 3-dim dense tensor.
     //
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_ptr.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_ptr.mlir
index 3ebe3be757d2d0..6005aa6cfeaed6 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_ptr.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_ptr.mlir
@@ -10,7 +10,7 @@
 // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
 // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
 // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
-// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
 // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
 // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
 //
@@ -54,41 +54,7 @@
 // in addition to layout.
 //
 module {
-
-  //
-  // Helper method to print values and indices arrays. The transfer actually
-  // reads more than required to verify size of buffer as well.
-  //
-  func.func @dumpf64(%arg0: memref<?xf64>) {
-    %c = arith.constant 0 : index
-    %d = arith.constant 0.0 : f64
-    %0 = vector.transfer_read %arg0[%c], %d: memref<?xf64>, vector<8xf64>
-    vector.print %0 : vector<8xf64>
-    return
-  }
-  func.func @dumpi08(%arg0: memref<?xi8>) {
-    %c = arith.constant 0 : index
-    %d = arith.constant 0 : i8
-    %0 = vector.transfer_read %arg0[%c], %d: memref<?xi8>, vector<8xi8>
-    vector.print %0 : vector<8xi8>
-    return
-  }
-  func.func @dumpi32(%arg0: memref<?xi32>) {
-    %c = arith.constant 0 : index
-    %d = arith.constant 0 : i32
-    %0 = vector.transfer_read %arg0[%c], %d: memref<?xi32>, vector<8xi32>
-    vector.print %0 : vector<8xi32>
-    return
-  }
-  func.func @dumpi64(%arg0: memref<?xi64>) {
-    %c = arith.constant 0 : index
-    %d = arith.constant 0 : i64
-    %0 = vector.transfer_read %arg0[%c], %d: memref<?xi64>, vector<8xi64>
-    vector.print %0 : vector<8xi64>
-    return
-  }
-
-  func.func @entry() {
+  func.func @main() {
     %c1 = arith.constant 1 : index
     %t1 = arith.constant sparse<
       [ [0,0], [0,1], [0,63], [1,0], [1,1], [31,0], [31,63] ],
@@ -106,50 +72,78 @@ module {
     %6 = sparse_tensor.convert %3 : tensor<32x64xf64, #CSC>  to tensor<32x64xf64, #DCSR>
 
     //
-    // All proper row-/column-wise?
+    // Verify the outputs.
     //
-    // CHECK:      ( 1, 2, 3, 4, 5, 6, 7, 0 )
-    // CHECK-NEXT: ( 1, 4, 6, 2, 5, 3, 7, 0 )
-    // CHECK-NEXT: ( 1, 4, 6, 2, 5, 3, 7, 0 )
-    // CHECK-NEXT: ( 1, 4, 6, 2, 5, 3, 7, 0 )
-    // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 0 )
-    // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 0 )
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 7
+    // CHECK-NEXT: dim = ( 32, 64 )
+    // CHECK-NEXT: lvl = ( 32, 64 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 31
+    // CHECK-NEXT: pos[1] : ( 0, 3, 5, 7
+    // CHECK-NEXT: crd[1] : ( 0, 1, 63, 0, 1, 0, 63
+    // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7
+    // CHECK-NEXT: ----
     //
-    %m1 = sparse_tensor.values %1 : tensor<32x64xf64, #DCSR> to memref<?xf64>
-    %m2 = sparse_tensor.values %2 : tensor<32x64xf64, #DCSC> to memref<?xf64>
-    %m3 = sparse_tensor.values %3 : tensor<32x64xf64, #CSC>  to memref<?xf64>
-    %m4 = sparse_tensor.values %4 : tensor<32x64xf64, #DCSC> to memref<?xf64>
-    %m5 = sparse_tensor.values %5 : tensor<32x64xf64, #DCSR> to memref<?xf64>
-    %m6 = sparse_tensor.values %6 : tensor<32x64xf64, #DCSR> to memref<?xf64>
-    call @dumpf64(%m1) : (memref<?xf64>) -> ()
-    call @dumpf64(%m2) : (memref<?xf64>) -> ()
-    call @dumpf64(%m3) : (memref<?xf64>) -> ()
-    call @dumpf64(%m4) : (memref<?xf64>) -> ()
-    call @dumpf64(%m5) : (memref<?xf64>) -> ()
-    call @dumpf64(%m6) : (memref<?xf64>) -> ()
-
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 7
+    // CHECK-NEXT: dim = ( 32, 64 )
+    // CHECK-NEXT: lvl = ( 64, 32 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 63
+    // CHECK-NEXT: pos[1] : ( 0, 3, 5, 7
+    // CHECK-NEXT: crd[1] : ( 0, 1, 31, 0, 1, 0, 31
+    // CHECK-NEXT: values : ( 1, 4, 6, 2, 5, 3, 7
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 7
+    // CHECK-NEXT: dim = ( 32, 64 )
+    // CHECK-NEXT: lvl = ( 64, 32 )
+    // CHECK-NEXT: pos[1] : ( 0, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7
+    // CHECK-NEXT: crd[1] : ( 0, 1, 31, 0, 1, 0, 31
+    // CHECK-NEXT: values : ( 1, 4, 6, 2, 5, 3, 7
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 7
+    // CHECK-NEXT: dim = ( 32, 64 )
+    // CHECK-NEXT: lvl = ( 64, 32 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 63
+    // CHECK-NEXT: pos[1] : ( 0, 3, 5, 7
+    // CHECK-NEXT: crd[1] : ( 0, 1, 31, 0, 1, 0, 31
+    // CHECK-NEXT: values : ( 1, 4, 6, 2, 5, 3, 7
+    // CHECK-NEXT: ----
     //
-    // Sanity check on indices.
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 7
+    // CHECK-NEXT: dim = ( 32, 64 )
+    // CHECK-NEXT: lvl = ( 32, 64 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 31
+    // CHECK-NEXT: pos[1] : ( 0, 3, 5, 7
+    // CHECK-NEXT: crd[1] : ( 0, 1, 63, 0, 1, 0, 63
+    // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7
+    // CHECK-NEXT: ----
     //
-    // CHECK-NEXT: ( 0, 1, 63, 0, 1, 0, 63, 0 )
-    // CHECK-NEXT: ( 0, 1, 31, 0, 1, 0, 31, 0 )
-    // CHECK-NEXT: ( 0, 1, 31, 0, 1, 0, 31, 0 )
-    // CHECK-NEXT: ( 0, 1, 31, 0, 1, 0, 31, 0 )
-    // CHECK-NEXT: ( 0, 1, 63, 0, 1, 0, 63, 0 )
-    // CHECK-NEXT: ( 0, 1, 63, 0, 1, 0, 63, 0 )
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 7
+    // CHECK-NEXT: dim = ( 32, 64 )
+    // CHECK-NEXT: lvl = ( 32, 64 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 31
+    // CHECK-NEXT: pos[1] : ( 0, 3, 5, 7
+    // CHECK-NEXT: crd[1] : ( 0, 1, 63, 0, 1, 0, 63
+    // CHECK-NEXT: values : ( 1, 2, 3, 4, 5, 6, 7
+    // CHECK-NEXT: ----
     //
-    %i1 = sparse_tensor.coordinates %1 { level = 1 : index } : tensor<32x64xf64, #DCSR> to memref<?xi8>
-    %i2 = sparse_tensor.coordinates %2 { level = 1 : index } : tensor<32x64xf64, #DCSC> to memref<?xi64>
-    %i3 = sparse_tensor.coordinates %3 { level = 1 : index } : tensor<32x64xf64, #CSC>  to memref<?xi32>
-    %i4 = sparse_tensor.coordinates %4 { level = 1 : index } : tensor<32x64xf64, #DCSC> to memref<?xi64>
-    %i5 = sparse_tensor.coordinates %5 { level = 1 : index } : tensor<32x64xf64, #DCSR> to memref<?xi8>
-    %i6 = sparse_tensor.coordinates %6 { level = 1 : index } : tensor<32x64xf64, #DCSR> to memref<?xi8>
-    call @dumpi08(%i1) : (memref<?xi8>)  -> ()
-    call @dumpi64(%i2) : (memref<?xi64>) -> ()
-    call @dumpi32(%i3) : (memref<?xi32>) -> ()
-    call @dumpi64(%i4) : (memref<?xi64>) -> ()
-    call @dumpi08(%i5) : (memref<?xi08>) -> ()
-    call @dumpi08(%i6) : (memref<?xi08>) -> ()
+    sparse_tensor.print %1 : tensor<32x64xf64, #DCSR>
+    sparse_tensor.print %2 : tensor<32x64xf64, #DCSC>
+    sparse_tensor.print %3 : tensor<32x64xf64, #CSC>
+    sparse_tensor.print %4 : tensor<32x64xf64, #DCSC>
+    sparse_tensor.print %5 : tensor<32x64xf64, #DCSR>
+    sparse_tensor.print %6 : tensor<32x64xf64, #DCSR>
 
     // Release the resources.
     bufferization.dealloc_tensor %1 : tensor<32x64xf64, #DCSR>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2dense.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2dense.mlir
index 1655d6a03a6202..9b05f9bf3a29c2 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2dense.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2dense.mlir
@@ -10,7 +10,7 @@
 // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
 // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
 // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
-// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
 // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
 // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
 //
@@ -107,7 +107,7 @@ module {
   //
   // Main driver.
   //
-  func.func @entry() {
+  func.func @main() {
     //
     // Initialize a 3-dim dense tensor.
     //
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2sparse.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2sparse.mlir
index 2ace317554a070..0f9dfb9da7204f 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2sparse.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2sparse.mlir
@@ -10,7 +10,7 @@
 // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
 // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
 // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
-// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
 // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
 // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
 //
@@ -180,7 +180,7 @@ module {
   //
   // Main driver.
   //
-  func.func @entry() {
+  func.func @main() {
     call @testNonSingleton() : () -> ()
     call @testSingleton() : () -> ()
     return
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir
index 16252c1005ebbb..ba3636b83c8be3 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir
@@ -10,7 +10,7 @@
 // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
 // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
 // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
-// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
 // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
 // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
 //
@@ -93,17 +93,17 @@ module {
 
   func.func @add_coo_coo_out_coo(%arga: tensor<8x8xf32, #SortedCOO>,
                                  %argb: tensor<8x8xf32, #SortedCOOSoA>)
-		                 -> tensor<8x8xf32, #SortedCOO> {
-    %init = tensor.empty() : tensor<8x8xf32, #SortedCOO>
+		                 -> tensor<8x8xf32, #SortedCOOSoA> {
+    %init = tensor.empty() : tensor<8x8xf32, #SortedCOOSoA>
     %0 = linalg.generic #trait
       ins(%arga, %argb: tensor<8x8xf32, #SortedCOO>,
                         tensor<8x8xf32, #SortedCOOSoA>)
-      outs(%init: tensor<8x8xf32, #SortedCOO>) {
+      outs(%init: tensor<8x8xf32, #SortedCOOSoA>) {
         ^bb(%a: f32, %b: f32, %x: f32):
           %0 = arith.addf %a, %b : f32
           linalg.yield %0 : f32
-        } -> tensor<8x8xf32, #SortedCOO>
-    return %0 : tensor<8x8xf32, #SortedCOO>
+        } -> tensor<8x8xf32, #SortedCOOSoA>
+    return %0 : tensor<8x8xf32, #SortedCOOSoA>
   }
 
 
@@ -126,7 +126,7 @@ module {
     return %0 : tensor<8x8xf32>
   }
 
-  func.func @entry() {
+  func.func @main() {
     %c0  = arith.constant 0 : index
     %c1  = arith.constant 1 : index
     %c8  = arith.constant 8 : index
@@ -171,19 +171,19 @@ module {
                                             -> tensor<8x8xf32>
     %COO_RET = call @add_coo_coo_out_coo(%COO_A, %COO_B) : (tensor<8x8xf32, #SortedCOO>,
                                                             tensor<8x8xf32, #SortedCOOSoA>)
-                                                         -> tensor<8x8xf32, #SortedCOO>
-    %C4 = sparse_tensor.convert %COO_RET : tensor<8x8xf32, #SortedCOO> to tensor<8x8xf32>
+                                                         -> tensor<8x8xf32, #SortedCOOSoA>
+
     //
     // Verify computed matrix C.
     //
-    // CHECK-COUNT-4:      ( 8.8, 4.8, 6.8, 4.8, 8.8, 6.1, 14.8, 16.8 )
-    // CHECK-NEXT-COUNT-4: ( 4.4, 4.4, 4.4, 8.4, 8.4, 12.4, 16.4, 16.4 )
-    // CHECK-NEXT-COUNT-4: ( 8.8, 4.8, 6.8, 8.8, 8.8, 12.8, 14.8, 15.8 )
-    // CHECK-NEXT-COUNT-4: ( 4.3, 5.3, 6.3, 8.3, 8.3, 12.3, 14.3, 16.3 )
-    // CHECK-NEXT-COUNT-4: ( 4.5, 4.5, 6.5, 8.5, 8.5, 12.5, 14.5, 16.5 )
-    // CHECK-NEXT-COUNT-4: ( 9.9, 4.9, 6.9, 8.9, 8.9, 12.9, 15.9, 16.9 )
-    // CHECK-NEXT-COUNT-4: ( 12.1, 6.1, 5.1, 9.1, 9.1, 13.1, 15.1, 17.1 )
-    // CHECK-NEXT-COUNT-4: ( 15.4, 5.4, 7.4, 5.4, 11.4, 10.4, 11.4, 9.4 )
+    // CHECK-COUNT-3:      ( 8.8, 4.8, 6.8, 4.8, 8.8, 6.1, 14.8, 16.8 )
+    // CHECK-NEXT-COUNT-3: ( 4.4, 4.4, 4.4, 8.4, 8.4, 12.4, 16.4, 16.4 )
+    // CHECK-NEXT-COUNT-3: ( 8.8, 4.8, 6.8, 8.8, 8.8, 12.8, 14.8, 15.8 )
+    // CHECK-NEXT-COUNT-3: ( 4.3, 5.3, 6.3, 8.3, 8.3, 12.3, 14.3, 16.3 )
+    // CHECK-NEXT-COUNT-3: ( 4.5, 4.5, 6.5, 8.5, 8.5, 12.5, 14.5, 16.5 )
+    // CHECK-NEXT-COUNT-3: ( 9.9, 4.9, 6.9, 8.9, 8.9, 12.9, 15.9, 16.9 )
+    // CHECK-NEXT-COUNT-3: ( 12.1, 6.1, 5.1, 9.1, 9.1, 13.1, 15.1, 17.1 )
+    // CHECK-NEXT-COUNT-3: ( 15.4, 5.4, 7.4, 5.4, 11.4, 10.4, 11.4, 9.4 )
     //
     %f0  = arith.constant 0.0 : f32
     scf.for %i = %c0 to %c8 step %c1 {
@@ -193,23 +193,45 @@ module {
         : tensor<8x8xf32>, vector<8xf32>
       %v3 = vector.transfer_read %C3[%i, %c0], %f0
         : tensor<8x8xf32>, vector<8xf32>
-      %v4 = vector.transfer_read %C4[%i, %c0], %f0
-        : tensor<8x8xf32>, vector<8xf32>
       vector.print %v1 : vector<8xf32>
       vector.print %v2 : vector<8xf32>
       vector.print %v3 : vector<8xf32>
-      vector.print %v4 : vector<8xf32>
     }
 
+    //
+    // Ensure that COO-SoA output has the same values.
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 64
+    // CHECK-NEXT: dim = ( 8, 8 )
+    // CHECK-NEXT: lvl = ( 8, 8 )
+    // CHECK-NEXT: pos[0] : ( 0, 64
+    // CHECK-NEXT: crd[0] : ( 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
+    // CHECK-SAME:            2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+    // CHECK-SAME:            5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7,
+    // CHECK-SAME:            7, 7, 7, 7
+    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3,
+    // CHECK-SAME:            4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
+    // CHECK-SAME:            0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3,
+    // CHECK-SAME:            4, 5, 6, 7
+    // CHECK-NEXT: values : ( 8.8, 4.8, 6.8, 4.8, 8.8, 6.1, 14.8, 16.8, 4.4, 4.4, 4.4, 8.4,
+    // CHECK-SAME:            8.4, 12.4, 16.4, 16.4, 8.8, 4.8, 6.8, 8.8, 8.8, 12.8, 14.8,
+    // CHECK-SAME:            15.8, 4.3, 5.3, 6.3, 8.3, 8.3, 12.3, 14.3, 16.3, 4.5, 4.5,
+    // CHECK-SAME:            6.5, 8.5, 8.5, 12.5, 14.5, 16.5, 9.9, 4.9, 6.9, 8.9, 8.9,
+    // CHECK-SAME:            12.9, 15.9, 16.9, 12.1, 6.1, 5.1, 9.1, 9.1, 13.1, 15.1, 17.1,
+    // CHECK-SAME:            15.4, 5.4, 7.4, 5.4, 11.4, 10.4, 11.4, 9.4
+    // CHECK-NEXT: ----
+    //
+    sparse_tensor.print %COO_RET : tensor<8x8xf32, #SortedCOOSoA>
+
     // Release resources.
     bufferization.dealloc_tensor %C1 : tensor<8x8xf32>
     bufferization.dealloc_tensor %C2 : tensor<8x8xf32>
     bufferization.dealloc_tensor %C3 : tensor<8x8xf32>
-    bufferization.dealloc_tensor %C4 : tensor<8x8xf32>
     bufferization.dealloc_tensor %CSR_A : tensor<8x8xf32, #CSR>
     bufferization.dealloc_tensor %COO_A : tensor<8x8xf32, #SortedCOO>
     bufferization.dealloc_tensor %COO_B : tensor<8x8xf32, #SortedCOOSoA>
-    bufferization.dealloc_tensor %COO_RET : tensor<8x8xf32, #SortedCOO>
+    bufferization.dealloc_tensor %COO_RET : tensor<8x8xf32, #SortedCOOSoA>
 
 
     return
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dilated_conv_2d_nhwc_hwcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dilated_conv_2d_nhwc_hwcf.mlir
index b4d40ae084015a..40738a9f7d7f19 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dilated_conv_2d_nhwc_hwcf.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dilated_conv_2d_nhwc_hwcf.mlir
@@ -10,7 +10,7 @@
 // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
 // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
 // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
-// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
 // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
 // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
 //
@@ -78,7 +78,7 @@ func.func @conv_2d_nhwc_hwcf_dual_CDCC(%arg0: tensor<?x?x?x?xf32, #CDCC>, %arg1:
 }
 
 
-func.func @entry() {
+func.func @main() {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
   %c3 = arith.constant 3 : index
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir
index f7ba4daa245892..5451f2d957ad35 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir
@@ -10,7 +10,7 @@
 // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
 // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
 // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
-// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
 // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
 // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
 //
@@ -49,7 +49,7 @@ module {
   //
   // Main driver.
   //
-  func.func @entry() {
+  func.func @main() {
     // Setup two sparse vectors.
     %d1 = arith.constant sparse<
         [ [0], [1], [22], [23], [1022] ], [1.0, 2.0, 3.0, 4.0, 5.0]
@@ -60,6 +60,30 @@ module {
     %s1 = sparse_tensor.convert %d1 : tensor<1024xf32> to tensor<1024xf32, #SparseVector>
     %s2 = sparse_tensor.convert %d2 : tensor<1024xf32> to tensor<1024xf32, #SparseVector>
 
+    //
+    // Verify the inputs.
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 5
+    // CHECK-NEXT: dim = ( 1024 )
+    // CHECK-NEXT: lvl = ( 1024 )
+    // CHECK-NEXT: pos[0] : ( 0, 5
+    // CHECK-NEXT: crd[0] : ( 0, 1, 22, 23, 1022
+    // CHECK-NEXT: values : ( 1, 2, 3, 4, 5
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 3
+    // CHECK-NEXT: dim = ( 1024 )
+    // CHECK-NEXT: lvl = ( 1024 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 22, 1022, 1023
+    // CHECK-NEXT: values : ( 6, 7, 8
+    // CHECK-NEXT: ----
+    //
+    sparse_tensor.print %s1 : tensor<1024xf32, #SparseVector>
+    sparse_tensor.print %s2 : tensor<1024xf32, #SparseVector>
+
     // Call the kernel and verify the output.
     //
     // CHECK: 53
@@ -73,16 +97,6 @@ module {
     %1 = tensor.extract %0[] : tensor<f32>
     vector.print %1 : f32
 
-    // Print number of entries in the sparse vectors.
-    //
-    // CHECK: 5
-    // CHECK: 3
-    //
-    %noe1 = sparse_tensor.number_of_entries %s1 : tensor<1024xf32, #SparseVector>
-    %noe2 = sparse_tensor.number_of_entries %s2 : tensor<1024xf32, #SparseVector>
-    vector.print %noe1 : index
-    vector.print %noe2 : index
-
     // Release the resources.
     bufferization.dealloc_tensor %0 : tensor<f32>
     bufferization.dealloc_tensor %s1 : tensor<1024xf32, #SparseVector>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir
index 93a7ea51ec9c4d..451195b2185b76 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir
@@ -10,7 +10,7 @@
 // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
 // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
 // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
-// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
 // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
 // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
 //
@@ -35,8 +35,6 @@
 }>
 
 module {
-  func.func private @printMemrefF64(%ptr : tensor<*xf64>)
-
   //
   // Column-wise storage forces the ijk loop to permute into jki
   // so that access pattern expansion (workspace) needs to be
@@ -54,7 +52,7 @@ module {
   //
   // Main driver.
   //
-  func.func @entry() {
+  func.func @main() {
     %c0 = arith.constant 0 : index
     %d1 = arith.constant -1.0 : f64
 
@@ -83,24 +81,26 @@ module {
        : (tensor<8x2xf64, #CSC>,
           tensor<2x4xf64, #CSC>) -> tensor<8x4xf64, #CSC>
 
-    // CHECK:      {{\[}}[32.53,   35.73,   38.93,   42.13],
-    // CHECK-NEXT: [34.56,   37.96,   41.36,   44.76],
-    // CHECK-NEXT: [36.59,   40.19,   43.79,   47.39],
-    // CHECK-NEXT: [38.62,   42.42,   46.22,   50.02],
-    // CHECK-NEXT: [40.65,   44.65,   48.65,   52.65],
-    // CHECK-NEXT: [42.68,   46.88,   51.08,   55.28],
-    // CHECK-NEXT: [44.71,   49.11,   53.51,   57.91],
-    // CHECK-NEXT: [46.74,   51.34,   55.94,   60.54]]
     //
-    %xc = sparse_tensor.convert %x3 : tensor<8x4xf64, #CSC> to tensor<8x4xf64>
-    %xu = tensor.cast %xc : tensor<8x4xf64> to tensor<*xf64>
-    call @printMemrefF64(%xu) : (tensor<*xf64>) -> ()
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 32
+    // CHECK-NEXT: dim = ( 8, 4 )
+    // CHECK-NEXT: lvl = ( 4, 8 )
+    // CHECK-NEXT: pos[1] : ( 0, 8, 16, 24, 32
+    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0,
+    // CHECK-SAME:            1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
+    // CHECK-NEXT: values : ( 32.53, 34.56, 36.59, 38.62, 40.65, 42.68, 44.71, 46.74,
+    // CHECK-SAME:            35.73, 37.96, 40.19, 42.42, 44.65, 46.88, 49.11, 51.34,
+    // CHECK-SAME:            38.93, 41.36, 43.79, 46.22, 48.65, 51.08, 53.51, 55.94,
+    // CHECK-SAME:            42.13, 44.76, 47.39, 50.02, 52.65, 55.28, 57.91, 60.54
+    // CHECK-NEXT: ----
+    //
+    sparse_tensor.print %x3 : tensor<8x4xf64, #CSC>
 
     // Release the resources.
     bufferization.dealloc_tensor %x1 : tensor<8x2xf64, #CSC>
     bufferization.dealloc_tensor %x2 : tensor<2x4xf64, #CSC>
     bufferization.dealloc_tensor %x3 : tensor<8x4xf64, #CSC>
-    bufferization.dealloc_tensor %xc : tensor<8x4xf64>
 
     return
   }
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand_shape.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand_shape.mlir
index c784375e0f3eaf..6679a81c74088b 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand_shape.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand_shape.mlir
@@ -10,7 +10,7 @@
 // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
 // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
 // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
-// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
 // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
 // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
 //
@@ -115,7 +115,7 @@ module {
   //
   // Main driver.
   //
-  func.func @entry() {
+  func.func @main() {
     %c0 = arith.constant 0 : index
     %df = arith.constant -1.0 : f64
 
@@ -147,60 +147,111 @@ module {
     %expand11 = call @expand_sparse2sparse_dyn(%sdm) : (tensor<?x?xf64, #SparseMatrix>) -> tensor<?x2x?xf64, #Sparse3dTensor>
 
     //
-    // Verify results of expand
+    // Verify results of expand with dense output.
     //
     // CHECK:      ( ( 1, 0, 3, 0 ), ( 5, 0, 7, 0 ), ( 9, 0, 11, 0 ) )
     // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 5, 0, 7, 0 ), ( 9, 0, 11, 0 ) )
-    // CHECK-NEXT: ( 1, 3, 5, 7, 9,
-    // CHECK-NEXT: ( 1, 3, 5, 7, 9,
     // CHECK-NEXT: ( ( ( 1.1, 1.2 ), ( 1.3, 1.4 ) ), ( ( 2.1, 2.2 ), ( 2.3, 2.4 ) ), ( ( 3.1, 3.2 ), ( 3.3, 3.4 ) ) )
     // CHECK-NEXT: ( ( ( 1.1, 1.2 ), ( 1.3, 1.4 ) ), ( ( 2.1, 2.2 ), ( 2.3, 2.4 ) ), ( ( 3.1, 3.2 ), ( 3.3, 3.4 ) ) )
-    // CHECK-NEXT: ( 1.1, 1.2, 1.3, 1.4, 2.1, 2.2, 2.3, 2.4, 3.1, 3.2, 3.3, 3.4 )
-    // CHECK-NEXT: ( 1.1, 1.2, 1.3, 1.4, 2.1, 2.2, 2.3, 2.4, 3.1, 3.2, 3.3, 3.4 )
     // CHECK-NEXT: ( ( ( 1.1, 1.2 ), ( 1.3, 1.4 ) ), ( ( 2.1, 2.2 ), ( 2.3, 2.4 ) ), ( ( 3.1, 3.2 ), ( 3.3, 3.4 ) ) )
     // CHECK-NEXT: ( ( ( 1.1, 1.2 ), ( 1.3, 1.4 ) ), ( ( 2.1, 2.2 ), ( 2.3, 2.4 ) ), ( ( 3.1, 3.2 ), ( 3.3, 3.4 ) ) )
-    // CHECK-NEXT: 12
-    // CHECK-NEXT: ( 1.1, 1.2, 1.3, 1.4, 2.1, 2.2, 2.3, 2.4, 3.1, 3.2, 3.3, 3.4 )
-    // CHECK-NEXT: 12
-    // CHECK-NEXT: ( 1.1, 1.2, 1.3, 1.4, 2.1, 2.2, 2.3, 2.4, 3.1, 3.2, 3.3, 3.4 )
     //
-
     %m0 = vector.transfer_read %expand0[%c0, %c0], %df: tensor<3x4xf64>, vector<3x4xf64>
     vector.print %m0 : vector<3x4xf64>
     %m1 = vector.transfer_read %expand1[%c0, %c0], %df: tensor<3x4xf64>, vector<3x4xf64>
     vector.print %m1 : vector<3x4xf64>
-    %a2 = sparse_tensor.values %expand2 : tensor<3x4xf64, #SparseMatrix> to memref<?xf64>
-    %m2 = vector.transfer_read %a2[%c0], %df: memref<?xf64>, vector<12xf64>
-    vector.print %m2 : vector<12xf64>
-    %a3 = sparse_tensor.values %expand3 : tensor<3x4xf64, #SparseMatrix> to memref<?xf64>
-    %m3 = vector.transfer_read %a3[%c0], %df: memref<?xf64>, vector<12xf64>
-    vector.print %m3 : vector<12xf64>
-
     %m4 = vector.transfer_read %expand4[%c0, %c0, %c0], %df: tensor<3x2x2xf64>, vector<3x2x2xf64>
     vector.print %m4 : vector<3x2x2xf64>
     %m5 = vector.transfer_read %expand5[%c0, %c0, %c0], %df: tensor<3x2x2xf64>, vector<3x2x2xf64>
     vector.print %m5 : vector<3x2x2xf64>
-    %a6 = sparse_tensor.values %expand6 : tensor<3x2x2xf64, #Sparse3dTensor> to memref<?xf64>
-    %m6 = vector.transfer_read %a6[%c0], %df: memref<?xf64>, vector<12xf64>
-    vector.print %m6 : vector<12xf64>
-    %a7 = sparse_tensor.values %expand7 : tensor<3x2x2xf64, #Sparse3dTensor> to memref<?xf64>
-    %m7 = vector.transfer_read %a7[%c0], %df: memref<?xf64>, vector<12xf64>
-    vector.print %m7 : vector<12xf64>
-
     %m8 = vector.transfer_read %expand8[%c0, %c0, %c0], %df: tensor<?x2x?xf64>, vector<3x2x2xf64>
     vector.print %m8 : vector<3x2x2xf64>
     %m9 = vector.transfer_read %expand9[%c0, %c0, %c0], %df: tensor<?x2x?xf64>, vector<3x2x2xf64>
     vector.print %m9 : vector<3x2x2xf64>
-    %n10 = sparse_tensor.number_of_entries %expand10 : tensor<?x2x?xf64, #Sparse3dTensor>
-    vector.print %n10 : index
-    %a10 = sparse_tensor.values %expand10 : tensor<?x2x?xf64, #Sparse3dTensor> to memref<?xf64>
-    %m10 = vector.transfer_read %a10[%c0], %df: memref<?xf64>, vector<12xf64>
-    vector.print %m10 : vector<12xf64>
-    %n11 = sparse_tensor.number_of_entries %expand11 : tensor<?x2x?xf64, #Sparse3dTensor>
-    vector.print %n11 : index
-    %a11 = sparse_tensor.values %expand11 : tensor<?x2x?xf64, #Sparse3dTensor> to memref<?xf64>
-    %m11 = vector.transfer_read %a11[%c0], %df: memref<?xf64>, vector<12xf64>
-    vector.print %m11 : vector<12xf64>
+
+    //
+    // Verify results of expand with sparse output.
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 6
+    // CHECK-NEXT: dim = ( 3, 4 )
+    // CHECK-NEXT: lvl = ( 3, 4 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2
+    // CHECK-NEXT: pos[1] : ( 0, 2, 4, 6
+    // CHECK-NEXT: crd[1] : ( 0, 2, 0, 2, 0, 2
+    // CHECK-NEXT: values : ( 1, 3, 5, 7, 9, 11
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 6
+    // CHECK-NEXT: dim = ( 3, 4 )
+    // CHECK-NEXT: lvl = ( 3, 4 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2
+    // CHECK-NEXT: pos[1] : ( 0, 2, 4, 6
+    // CHECK-NEXT: crd[1] : ( 0, 2, 0, 2, 0, 2
+    // CHECK-NEXT: values : ( 1, 3, 5, 7, 9, 11
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 12
+    // CHECK-NEXT: dim = ( 3, 2, 2 )
+    // CHECK-NEXT: lvl = ( 3, 2, 2 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2
+    // CHECK-NEXT: pos[1] : ( 0, 2, 4, 6
+    // CHECK-NEXT: crd[1] : ( 0, 1, 0, 1, 0, 1
+    // CHECK-NEXT: pos[2] : ( 0, 2, 4, 6, 8, 10, 12
+    // CHECK-NEXT: crd[2] : ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+    // CHECK-NEXT: values : ( 1.1, 1.2, 1.3, 1.4, 2.1, 2.2, 2.3, 2.4, 3.1, 3.2, 3.3, 3.4
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 12
+    // CHECK-NEXT: dim = ( 3, 2, 2 )
+    // CHECK-NEXT: lvl = ( 3, 2, 2 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2
+    // CHECK-NEXT: pos[1] : ( 0, 2, 4, 6
+    // CHECK-NEXT: crd[1] : ( 0, 1, 0, 1, 0, 1
+    // CHECK-NEXT: pos[2] : ( 0, 2, 4, 6, 8, 10, 12
+    // CHECK-NEXT: crd[2] : ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+    // CHECK-NEXT: values : ( 1.1, 1.2, 1.3, 1.4, 2.1, 2.2, 2.3, 2.4, 3.1, 3.2, 3.3, 3.4
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 12
+    // CHECK-NEXT: dim = ( 3, 2, 2 )
+    // CHECK-NEXT: lvl = ( 3, 2, 2 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2
+    // CHECK-NEXT: pos[1] : ( 0, 2, 4, 6
+    // CHECK-NEXT: crd[1] : ( 0, 1, 0, 1, 0, 1
+    // CHECK-NEXT: pos[2] : ( 0, 2, 4, 6, 8, 10, 12
+    // CHECK-NEXT: crd[2] : ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+    // CHECK-NEXT: values : ( 1.1, 1.2, 1.3, 1.4, 2.1, 2.2, 2.3, 2.4, 3.1, 3.2, 3.3, 3.4
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 12
+    // CHECK-NEXT: dim = ( 3, 2, 2 )
+    // CHECK-NEXT: lvl = ( 3, 2, 2 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2
+    // CHECK-NEXT: pos[1] : ( 0, 2, 4, 6
+    // CHECK-NEXT: crd[1] : ( 0, 1, 0, 1, 0, 1
+    // CHECK-NEXT: pos[2] : ( 0, 2, 4, 6, 8, 10, 12
+    // CHECK-NEXT: crd[2] : ( 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
+    // CHECK-NEXT: values : ( 1.1, 1.2, 1.3, 1.4, 2.1, 2.2, 2.3, 2.4, 3.1, 3.2, 3.3, 3.4
+    // CHECK-NEXT: ----
+    //
+    sparse_tensor.print %expand2 : tensor<3x4xf64, #SparseMatrix>
+    sparse_tensor.print %expand3 : tensor<3x4xf64, #SparseMatrix>
+    sparse_tensor.print %expand6 : tensor<3x2x2xf64, #Sparse3dTensor>
+    sparse_tensor.print %expand7 : tensor<3x2x2xf64, #Sparse3dTensor>
+    sparse_tensor.print %expand10 : tensor<?x2x?xf64, #Sparse3dTensor>
+    sparse_tensor.print %expand11 : tensor<?x2x?xf64, #Sparse3dTensor>
 
 
     // Release sparse resources.
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir
index 7478b604ff6715..37ff2e3ffd3f46 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir
@@ -10,7 +10,7 @@
 // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
 // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
 // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
-// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
 // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
 // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
 //
@@ -53,7 +53,7 @@ module {
     return %0 : tensor<6x6xi32, #DCSR>
   }
 
-  func.func @entry() {
+  func.func @main() {
     %c0 = arith.constant 0 : index
     %i0 = arith.constant 0 : i32
 
@@ -100,25 +100,27 @@ module {
     vector.print %v : vector<6x6xi32>
 
     //
-    // Should be the same as dense output
-    // CHECK:    ( ( 0, 0, -1, -6, -1, 6 ),
-    // CHECK-SAME: ( -1, 0, 1, 0, 1, 0 ),
-    // CHECK-SAME: ( 0, -1, 1, 0, 0, 0 ),
-    // CHECK-SAME: ( -1, 0, 0, 0, 0, 0 ),
-    // CHECK-SAME: ( 0, 0, 3, 6, -3, -6 ),
-    // CHECK-SAME: ( 2, -1, 3, 0, -3, 0 ) )
+    // Should be the same as dense output.
     //
-    %sparse_ret = sparse_tensor.convert %1
-      : tensor<6x6xi32, #DCSR> to tensor<6x6xi32>
-    %v1 = vector.transfer_read %sparse_ret[%c0, %c0], %i0
-      : tensor<6x6xi32>, vector<6x6xi32>
-    vector.print %v1 : vector<6x6xi32>
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 36
+    // CHECK-NEXT: dim = ( 6, 6 )
+    // CHECK-NEXT: lvl = ( 6, 6 )
+    // CHECK-NEXT: pos[0] : ( 0, 6
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4, 5
+    // CHECK-NEXT: pos[1] : ( 0, 6, 12, 18, 24, 30, 36
+    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4,
+    // CHECK-SAME:            5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5
+    // CHECK-NEXT: values : ( 0, 0, -1, -6, -1, 6, -1, 0, 1, 0, 1, 0, 0, -1, 1,
+    // CHECK-SAME:            0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 3, 6, -3, -6, 2, -1, 3, 0, -3, 0
+    // CHECK-NEXT: ----
+    //
+    sparse_tensor.print %1 : tensor<6x6xi32, #DCSR>
 
     // Release the resources.
     bufferization.dealloc_tensor %sparse_filter : tensor<3x3xi32, #DCSR>
     bufferization.dealloc_tensor %0 : tensor<6x6xi32>
     bufferization.dealloc_tensor %1 : tensor<6x6xi32, #DCSR>
-    bufferization.dealloc_tensor %sparse_ret : tensor<6x6xi32>
 
     return
   }
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
index 55ecac8dbc1874..8a5712d3fa1b10 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
@@ -10,7 +10,7 @@
 // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
 // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
 // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
-// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
 // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
 // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
 //
@@ -82,7 +82,7 @@ module {
   //
   // Main driver that reads tensor from file and calls the sparse kernel.
   //
-  func.func @entry() {
+  func.func @main() {
     %d0 = arith.constant 0.0 : f64
     %c0 = arith.constant 0 : index
     %c1 = arith.constant 1 : index
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_foreach_slices.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_foreach_slices.mlir
index 46b83be21dcf30..aef3a947e4f0ba 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_foreach_slices.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_foreach_slices.mlir
@@ -10,7 +10,7 @@
 // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
 // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
 // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
-// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
 // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
 // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
 //
@@ -100,7 +100,7 @@ module {
     return
   }
 
-  func.func @entry() {
+  func.func @main() {
     %c0 = arith.constant 0 : index
     %c1 = arith.constant 1 : index
     %c2 = arith.constant 2 : index
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_generate.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_generate.mlir
index c1547033062d37..e1f73eb4ac4fe6 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_generate.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_generate.mlir
@@ -10,7 +10,7 @@
 // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
 // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
 // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
-// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
 // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
 // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
 //
@@ -39,7 +39,7 @@ module {
   //
   // Main driver.
   //
-  func.func @entry() {
+  func.func @main() {
     %c0 = arith.constant 0 : index
     %c1 = arith.constant 1 : index
     %f0 = arith.constant 0.0 : f64
@@ -78,12 +78,20 @@ module {
     }
 
     %sv = sparse_tensor.convert %output : tensor<?xf64> to tensor<?xf64, #SparseVector>
-    %n0 = sparse_tensor.number_of_entries %sv : tensor<?xf64, #SparseVector>
 
-    // Print the number of non-zeros for verification.
     //
-    // CHECK: 5
-    vector.print %n0 : index
+    // Verify the outputs.
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 5
+    // CHECK-NEXT: dim = ( 50 )
+    // CHECK-NEXT: lvl = ( 50 )
+    // CHECK-NEXT: pos[0] : ( 0, 5
+    // CHECK-NEXT: crd[0] : ( 1, 9, 17, 27, 30
+    // CHECK-NEXT: values : ( 84, 34, 8, 40, 93
+    // CHECK-NEXT: ----
+    //
+    sparse_tensor.print %sv : tensor<?xf64, #SparseVector>
 
     // Release the resources.
     bufferization.dealloc_tensor %sv : tensor<?xf64, #SparseVector>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir
index 70a63ae7bc9270..9606a792959954 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir
@@ -10,7 +10,7 @@
 // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
 // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
 // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
-// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
 // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
 // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
 //
@@ -160,7 +160,7 @@ module {
   //
   // Main driver.
   //
-  func.func @entry() {
+  func.func @main() {
     %c0 = arith.constant 0 : index
     %du = arith.constant -1 : i64
     %df = arith.constant -1.0 : f32
@@ -208,63 +208,94 @@ module {
     //
     // Verify result.
     //
-    // CHECK:      2
-    // CHECK-NEXT: 8
-    // CHECK-NEXT: 8
-    // CHECK-NEXT: 8
-    // CHECK-NEXT: 2
-    // CHECK-NEXT: 12
-    // CHECK-NEXT: 12
-    // CHECK-NEXT: 12
-    // CHECK-NEXT: ( 20, 80 )
-    // CHECK-NEXT: ( 0, 1, 12, 3, 24, 5, 6, 7 )
-    // CHECK-NEXT: ( 0, 2, 8, 24, 64, 160, 384, 896 )
-    // CHECK-NEXT: ( 1, 3, 6, 11, 20, 37, 70, 135 )
-    // CHECK-NEXT: ( 10, 120 )
-    // CHECK-NEXT: ( 0, 1, 2, 3, 1, 12, 3, 4, 2, 3, 4, 25 )
-    // CHECK-NEXT: ( 0, 0, 0, 0, 0, 2, 2, 3, 0, 2, 12, 24 )
-    // CHECK-NEXT: ( 1, 2, 3, 4, 2, 4, 4, 5, 3, 4, 7, 9 )
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 2
+    // CHECK-NEXT: dim = ( 8 )
+    // CHECK-NEXT: lvl = ( 8 )
+    // CHECK-NEXT: pos[0] : ( 0, 2
+    // CHECK-NEXT: crd[0] : ( 2, 4
+    // CHECK-NEXT: values : ( 20, 80
+    // CHECK-NEXT: ----
     //
-    %n0 = sparse_tensor.number_of_entries %0 : tensor<8xi64, #SparseVector>
-    %n1 = sparse_tensor.number_of_entries %1 : tensor<8xi64, #SparseVector>
-    %n2 = sparse_tensor.number_of_entries %2 : tensor<8xi64, #SparseVector>
-    %n3 = sparse_tensor.number_of_entries %3 : tensor<8xi64, #SparseVector>
-    %n4 = sparse_tensor.number_of_entries %4 : tensor<3x4xi64, #SparseMatrix>
-    %n5 = sparse_tensor.number_of_entries %5 : tensor<3x4xi64, #SparseMatrix>
-    %n6 = sparse_tensor.number_of_entries %6 : tensor<3x4xi64, #SparseMatrix>
-    %n7 = sparse_tensor.number_of_entries %7 : tensor<3x4xi64, #SparseMatrix>
-    %8 = sparse_tensor.values %0 : tensor<8xi64, #SparseVector> to memref<?xi64>
-    %9 = sparse_tensor.values %1 : tensor<8xi64, #SparseVector> to memref<?xi64>
-    %10 = sparse_tensor.values %2 : tensor<8xi64, #SparseVector> to memref<?xi64>
-    %11 = sparse_tensor.values %3 : tensor<8xi64, #SparseVector> to memref<?xi64>
-    %12 = sparse_tensor.values %4 : tensor<3x4xi64, #SparseMatrix> to memref<?xi64>
-    %13 = sparse_tensor.values %5 : tensor<3x4xi64, #SparseMatrix> to memref<?xi64>
-    %14 = sparse_tensor.values %6 : tensor<3x4xi64, #SparseMatrix> to memref<?xi64>
-    %15 = sparse_tensor.values %7 : tensor<3x4xi64, #SparseMatrix> to memref<?xi64>
-    %16 = vector.transfer_read %8[%c0], %du: memref<?xi64>, vector<2xi64>
-    %17 = vector.transfer_read %9[%c0], %du: memref<?xi64>, vector<8xi64>
-    %18 = vector.transfer_read %10[%c0], %du: memref<?xi64>, vector<8xi64>
-    %19 = vector.transfer_read %11[%c0], %du: memref<?xi64>, vector<8xi64>
-    %20 = vector.transfer_read %12[%c0], %du: memref<?xi64>, vector<2xi64>
-    %21 = vector.transfer_read %13[%c0], %du: memref<?xi64>, vector<12xi64>
-    %22 = vector.transfer_read %14[%c0], %du: memref<?xi64>, vector<12xi64>
-    %23 = vector.transfer_read %15[%c0], %du: memref<?xi64>, vector<12xi64>
-    vector.print %n0 : index
-    vector.print %n1 : index
-    vector.print %n2 : index
-    vector.print %n3 : index
-    vector.print %n4 : index
-    vector.print %n5 : index
-    vector.print %n6 : index
-    vector.print %n7 : index
-    vector.print %16 : vector<2xi64>
-    vector.print %17 : vector<8xi64>
-    vector.print %18 : vector<8xi64>
-    vector.print %19 : vector<8xi64>
-    vector.print %20 : vector<2xi64>
-    vector.print %21 : vector<12xi64>
-    vector.print %22 : vector<12xi64>
-    vector.print %23 : vector<12xi64>
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 8
+    // CHECK-NEXT: dim = ( 8 )
+    // CHECK-NEXT: lvl = ( 8 )
+    // CHECK-NEXT: pos[0] : ( 0, 8
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4, 5, 6, 7
+    // CHECK-NEXT: values : ( 0, 1, 12, 3, 24, 5, 6, 7
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 8
+    // CHECK-NEXT: dim = ( 8 )
+    // CHECK-NEXT: lvl = ( 8 )
+    // CHECK-NEXT: pos[0] : ( 0, 8
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4, 5, 6, 7
+    // CHECK-NEXT: values : ( 0, 2, 8, 24, 64, 160, 384, 896
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 8
+    // CHECK-NEXT: dim = ( 8 )
+    // CHECK-NEXT: lvl = ( 8 )
+    // CHECK-NEXT: pos[0] : ( 0, 8
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2, 3, 4, 5, 6, 7
+    // CHECK-NEXT: values : ( 1, 3, 6, 11, 20, 37, 70, 135
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 2
+    // CHECK-NEXT: dim = ( 3, 4 )
+    // CHECK-NEXT: lvl = ( 3, 4 )
+    // CHECK-NEXT: pos[0] : ( 0, 2
+    // CHECK-NEXT: crd[0] : ( 1, 2
+    // CHECK-NEXT: pos[1] : ( 0, 1, 2
+    // CHECK-NEXT: crd[1] : ( 1, 3
+    // CHECK-NEXT: values : ( 10, 120
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 12
+    // CHECK-NEXT: dim = ( 3, 4 )
+    // CHECK-NEXT: lvl = ( 3, 4 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2
+    // CHECK-NEXT: pos[1] : ( 0, 4, 8, 12
+    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
+    // CHECK-NEXT: values : ( 0, 1, 2, 3, 1, 12, 3, 4, 2, 3, 4, 25
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 12
+    // CHECK-NEXT: dim = ( 3, 4 )
+    // CHECK-NEXT: lvl = ( 3, 4 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2
+    // CHECK-NEXT: pos[1] : ( 0, 4, 8, 12
+    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
+    // CHECK-NEXT: values : ( 0, 0, 0, 0, 0, 2, 2, 3, 0, 2, 12, 24
+    // CHECK-NEXT: ----
+    //
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 12
+    // CHECK-NEXT: dim = ( 3, 4 )
+    // CHECK-NEXT: lvl = ( 3, 4 )
+    // CHECK-NEXT: pos[0] : ( 0, 3
+    // CHECK-NEXT: crd[0] : ( 0, 1, 2
+    // CHECK-NEXT: pos[1] : ( 0, 4, 8, 12
+    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
+    // CHECK-NEXT: values : ( 1, 2, 3, 4, 2, 4, 4, 5, 3, 4, 7, 9
+    // CHECK-NEXT: ----
+    //
+    sparse_tensor.print %0 : tensor<8xi64, #SparseVector>
+    sparse_tensor.print %1 : tensor<8xi64, #SparseVector>
+    sparse_tensor.print %2 : tensor<8xi64, #SparseVector>
+    sparse_tensor.print %3 : tensor<8xi64, #SparseVector>
+    sparse_tensor.print %4 : tensor<3x4xi64, #SparseMatrix>
+    sparse_tensor.print %5 : tensor<3x4xi64, #SparseMatrix>
+    sparse_tensor.print %6 : tensor<3x4xi64, #SparseMatrix>
+    sparse_tensor.print %7 : tensor<3x4xi64, #SparseMatrix>
 
     // Release resources.
     bufferization.dealloc_tensor %sv : tensor<8xi64, #SparseVector>
@@ -283,13 +314,21 @@ module {
     //
     // Call the f32 kernel, verify the result, release the resources.
     //
-    // CHECK-NEXT: ( 0, 10, 0, 1, 1, 42 )
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 6
+    // CHECK-NEXT: dim = ( 2, 3 )
+    // CHECK-NEXT: lvl = ( 2, 3 )
+    // CHECK-NEXT: pos[0] : ( 0, 2
+    // CHECK-NEXT: crd[0] : ( 0, 1
+    // CHECK-NEXT: pos[1] : ( 0, 3, 6
+    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 0, 1, 2
+    // CHECK-NEXT: values : ( 0, 10, 0, 1, 1, 42
+    // CHECK-NEXT: ----
     //
     %100 = call @add_outer_2d(%sf32) : (tensor<2x3xf32, #SparseMatrix>)
       -> tensor<2x3xf32, #SparseMatrix>
-    %101 = sparse_tensor.values %100 : tensor<2x3xf32, #SparseMatrix> to memref<?xf32>
-    %102 = vector.transfer_read %101[%c0], %df: memref<?xf32>, vector<6xf32>
-    vector.print %102 : vector<6xf32>
+    sparse_tensor.print %100 : tensor<2x3xf32, #SparseMatrix>
+
     bufferization.dealloc_tensor %sf32 : tensor<2x3xf32, #SparseMatrix>
     bufferization.dealloc_tensor %100 : tensor<2x3xf32, #SparseMatrix>
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir
index 4bb1f3d12871f3..fc7b82fdecea34 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir
@@ -10,7 +10,7 @@
 // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
 // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
 // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
-// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
 // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
 // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
 //
@@ -138,7 +138,7 @@ module {
   //
   // Main driver.
   //
-  func.func @entry() {
+  func.func @main() {
     %c0 = arith.constant 0 : index
     %du = arith.constant -1 : i64
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_insert_3d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_insert_3d.mlir
index 364a188cf37c4c..db6612402357e5 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_insert_3d.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_insert_3d.mlir
@@ -10,7 +10,7 @@
 // DEFINE: %{compile} = mlir-opt %s --sparsifier="%{sparsifier_opts}"
 // DEFINE: %{compile_sve} = mlir-opt %s --sparsifier="%{sparsifier_opts_sve}"
 // DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
-// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run_opts} = -e main -entry-point-result=void
 // DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
 // DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
 //
@@ -48,7 +48,7 @@ module {
   //
   // Main driver.
   //
-  func.func @entry() {
+  func.func @main() {
     %c0 = arith.constant 0 : index
     %c1 = arith.constant 1 : index
     %c2 = arith.constant 2 : index

>From 6f9a028a0924e782a606567024883a95340475c8 Mon Sep 17 00:00:00 2001
From: Yinying Li <yinyingli at google.com>
Date: Wed, 13 Mar 2024 00:24:16 +0000
Subject: [PATCH 10/10] address comments

---
 .../SparseTensor/CPU/sparse_coo_test.mlir     | 21 +++++++++-----
 .../SparseTensor/CPU/sparse_index.mlir        | 29 +++++++++----------
 2 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir
index ba3636b83c8be3..16813e0aa707b8 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir
@@ -172,18 +172,19 @@ module {
     %COO_RET = call @add_coo_coo_out_coo(%COO_A, %COO_B) : (tensor<8x8xf32, #SortedCOO>,
                                                             tensor<8x8xf32, #SortedCOOSoA>)
                                                          -> tensor<8x8xf32, #SortedCOOSoA>
+    %C4 = sparse_tensor.convert %COO_RET : tensor<8x8xf32, #SortedCOOSoA> to tensor<8x8xf32>
 
     //
     // Verify computed matrix C.
     //
-    // CHECK-COUNT-3:      ( 8.8, 4.8, 6.8, 4.8, 8.8, 6.1, 14.8, 16.8 )
-    // CHECK-NEXT-COUNT-3: ( 4.4, 4.4, 4.4, 8.4, 8.4, 12.4, 16.4, 16.4 )
-    // CHECK-NEXT-COUNT-3: ( 8.8, 4.8, 6.8, 8.8, 8.8, 12.8, 14.8, 15.8 )
-    // CHECK-NEXT-COUNT-3: ( 4.3, 5.3, 6.3, 8.3, 8.3, 12.3, 14.3, 16.3 )
-    // CHECK-NEXT-COUNT-3: ( 4.5, 4.5, 6.5, 8.5, 8.5, 12.5, 14.5, 16.5 )
-    // CHECK-NEXT-COUNT-3: ( 9.9, 4.9, 6.9, 8.9, 8.9, 12.9, 15.9, 16.9 )
-    // CHECK-NEXT-COUNT-3: ( 12.1, 6.1, 5.1, 9.1, 9.1, 13.1, 15.1, 17.1 )
-    // CHECK-NEXT-COUNT-3: ( 15.4, 5.4, 7.4, 5.4, 11.4, 10.4, 11.4, 9.4 )
+    // CHECK-COUNT-4:      ( 8.8, 4.8, 6.8, 4.8, 8.8, 6.1, 14.8, 16.8 )
+    // CHECK-NEXT-COUNT-4: ( 4.4, 4.4, 4.4, 8.4, 8.4, 12.4, 16.4, 16.4 )
+    // CHECK-NEXT-COUNT-4: ( 8.8, 4.8, 6.8, 8.8, 8.8, 12.8, 14.8, 15.8 )
+    // CHECK-NEXT-COUNT-4: ( 4.3, 5.3, 6.3, 8.3, 8.3, 12.3, 14.3, 16.3 )
+    // CHECK-NEXT-COUNT-4: ( 4.5, 4.5, 6.5, 8.5, 8.5, 12.5, 14.5, 16.5 )
+    // CHECK-NEXT-COUNT-4: ( 9.9, 4.9, 6.9, 8.9, 8.9, 12.9, 15.9, 16.9 )
+    // CHECK-NEXT-COUNT-4: ( 12.1, 6.1, 5.1, 9.1, 9.1, 13.1, 15.1, 17.1 )
+    // CHECK-NEXT-COUNT-4: ( 15.4, 5.4, 7.4, 5.4, 11.4, 10.4, 11.4, 9.4 )
     //
     %f0  = arith.constant 0.0 : f32
     scf.for %i = %c0 to %c8 step %c1 {
@@ -193,9 +194,12 @@ module {
         : tensor<8x8xf32>, vector<8xf32>
       %v3 = vector.transfer_read %C3[%i, %c0], %f0
         : tensor<8x8xf32>, vector<8xf32>
+      %v4 = vector.transfer_read %C4[%i, %c0], %f0
+        : tensor<8x8xf32>, vector<8xf32>
       vector.print %v1 : vector<8xf32>
       vector.print %v2 : vector<8xf32>
       vector.print %v3 : vector<8xf32>
+      vector.print %v4 : vector<8xf32>
     }
 
     //
@@ -228,6 +232,7 @@ module {
     bufferization.dealloc_tensor %C1 : tensor<8x8xf32>
     bufferization.dealloc_tensor %C2 : tensor<8x8xf32>
     bufferization.dealloc_tensor %C3 : tensor<8x8xf32>
+    bufferization.dealloc_tensor %C4 : tensor<8x8xf32>
     bufferization.dealloc_tensor %CSR_A : tensor<8x8xf32, #CSR>
     bufferization.dealloc_tensor %COO_A : tensor<8x8xf32, #SortedCOO>
     bufferization.dealloc_tensor %COO_B : tensor<8x8xf32, #SortedCOOSoA>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir
index 9606a792959954..3ce45e5fd971f2 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir
@@ -297,22 +297,8 @@ module {
     sparse_tensor.print %6 : tensor<3x4xi64, #SparseMatrix>
     sparse_tensor.print %7 : tensor<3x4xi64, #SparseMatrix>
 
-    // Release resources.
-    bufferization.dealloc_tensor %sv : tensor<8xi64, #SparseVector>
-    bufferization.dealloc_tensor %dv : tensor<8xi64, #SparseVector>
-    bufferization.dealloc_tensor %0 : tensor<8xi64, #SparseVector>
-    bufferization.dealloc_tensor %1 : tensor<8xi64, #SparseVector>
-    bufferization.dealloc_tensor %2 : tensor<8xi64, #SparseVector>
-    bufferization.dealloc_tensor %3 : tensor<8xi64, #SparseVector>
-    bufferization.dealloc_tensor %sm : tensor<3x4xi64, #SparseMatrix>
-    bufferization.dealloc_tensor %dm : tensor<3x4xi64, #SparseMatrix>
-    bufferization.dealloc_tensor %4 : tensor<3x4xi64, #SparseMatrix>
-    bufferization.dealloc_tensor %5 : tensor<3x4xi64, #SparseMatrix>
-    bufferization.dealloc_tensor %6 : tensor<3x4xi64, #SparseMatrix>
-    bufferization.dealloc_tensor %7 : tensor<3x4xi64, #SparseMatrix>
-
     //
-    // Call the f32 kernel, verify the result, release the resources.
+    // Call the f32 kernel, verify the result.
     //
     // CHECK:      ---- Sparse Tensor ----
     // CHECK-NEXT: nse = 6
@@ -329,6 +315,19 @@ module {
       -> tensor<2x3xf32, #SparseMatrix>
     sparse_tensor.print %100 : tensor<2x3xf32, #SparseMatrix>
 
+    // Release resources.
+    bufferization.dealloc_tensor %sv : tensor<8xi64, #SparseVector>
+    bufferization.dealloc_tensor %dv : tensor<8xi64, #SparseVector>
+    bufferization.dealloc_tensor %0 : tensor<8xi64, #SparseVector>
+    bufferization.dealloc_tensor %1 : tensor<8xi64, #SparseVector>
+    bufferization.dealloc_tensor %2 : tensor<8xi64, #SparseVector>
+    bufferization.dealloc_tensor %3 : tensor<8xi64, #SparseVector>
+    bufferization.dealloc_tensor %sm : tensor<3x4xi64, #SparseMatrix>
+    bufferization.dealloc_tensor %dm : tensor<3x4xi64, #SparseMatrix>
+    bufferization.dealloc_tensor %4 : tensor<3x4xi64, #SparseMatrix>
+    bufferization.dealloc_tensor %5 : tensor<3x4xi64, #SparseMatrix>
+    bufferization.dealloc_tensor %6 : tensor<3x4xi64, #SparseMatrix>
+    bufferization.dealloc_tensor %7 : tensor<3x4xi64, #SparseMatrix>
     bufferization.dealloc_tensor %sf32 : tensor<2x3xf32, #SparseMatrix>
     bufferization.dealloc_tensor %100 : tensor<2x3xf32, #SparseMatrix>