[Mlir-commits] [mlir] 27ea470 - [mlir][sparse] Add runtime support for reading a COO tensor and writing the data to the given indices and values buffers.
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Feb 28 08:28:19 PST 2023
Author: bixia1
Date: 2023-02-28T08:28:13-08:00
New Revision: 27ea470f22abc73792a380d4f38c3917800fe02a
URL: https://github.com/llvm/llvm-project/commit/27ea470f22abc73792a380d4f38c3917800fe02a
DIFF: https://github.com/llvm/llvm-project/commit/27ea470f22abc73792a380d4f38c3917800fe02a.diff
LOG: [mlir][sparse] Add runtime support for reading a COO tensor and writing the data to the given indices and values buffers.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143862
Added:
Modified:
mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h
mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_file_io.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
index 9ed209517a111..165b3af74ab4e 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
@@ -92,6 +92,11 @@ enum class PrimaryType : uint32_t {
};
// This x-macro includes all `V` types.
+// TODO: We currently split out the non-variadic version from the variadic
+// version. Using ##__VA_ARGS__ to avoid the split gives
+// warning: token pasting of ',' and __VA_ARGS__ is a GNU extension
+// [-Wgnu-zero-variadic-macro-arguments]
+// and __VA_OPT__(, ) __VA_ARGS__ requires c++20.
#define MLIR_SPARSETENSOR_FOREVERY_V(DO) \
DO(F64, double) \
DO(F32, float) \
@@ -104,6 +109,27 @@ enum class PrimaryType : uint32_t {
DO(C64, complex64) \
DO(C32, complex32)
+// This x-macro includes all `V` types and supports variadic arguments.
+#define MLIR_SPARSETENSOR_FOREVERY_V_VAR(DO, ...) \
+ DO(F64, double, __VA_ARGS__) \
+ DO(F32, float, __VA_ARGS__) \
+ DO(F16, f16, __VA_ARGS__) \
+ DO(BF16, bf16, __VA_ARGS__) \
+ DO(I64, int64_t, __VA_ARGS__) \
+ DO(I32, int32_t, __VA_ARGS__) \
+ DO(I16, int16_t, __VA_ARGS__) \
+ DO(I8, int8_t, __VA_ARGS__) \
+ DO(C64, complex64, __VA_ARGS__) \
+ DO(C32, complex32, __VA_ARGS__)
+
+// This x-macro calls its argument on every pair of overhead and `V` types.
+#define MLIR_SPARSETENSOR_FOREVERY_V_O(DO) \
+ MLIR_SPARSETENSOR_FOREVERY_V_VAR(DO, 64, uint64_t) \
+ MLIR_SPARSETENSOR_FOREVERY_V_VAR(DO, 32, uint32_t) \
+ MLIR_SPARSETENSOR_FOREVERY_V_VAR(DO, 16, uint16_t) \
+ MLIR_SPARSETENSOR_FOREVERY_V_VAR(DO, 8, uint8_t) \
+ MLIR_SPARSETENSOR_FOREVERY_V_VAR(DO, 0, index_type)
+
constexpr bool isFloatingPrimaryType(PrimaryType valTy) {
return PrimaryType::kF64 <= valTy && valTy <= PrimaryType::kBF16;
}
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
index b24bfd2c161b0..d8382de72630e 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
@@ -249,6 +249,14 @@ class SparseTensorReader final {
return tensor;
}
+ /// Reads the COO tensor from the file, stores the coordinates and values to
+ /// the given buffers, returns a boolean value to indicate whether the COO
+ /// elements are sorted.
+ /// Precondition: the buffers should have enough space to hold the elements.
+ template <typename C, typename V>
+ bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl,
+ C *lvlCoordinates, V *values);
+
private:
/// Attempts to read a line from the file. Is private because there's
/// no reason for client code to call it.
@@ -287,6 +295,13 @@ class SparseTensorReader final {
void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl,
SparseTensorCOO<V> *lvlCOO);
+ /// The internal implementation of `readToBuffers`. We template over
+ /// `IsPattern` in order to perform LICM without needing to duplicate the
+ /// source code.
+ template <typename C, typename V, bool IsPattern>
+ bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl,
+ C *lvlCoordinates, V *values);
+
/// Reads the MME header of a general sparse matrix of type real.
void readMMEHeader();
@@ -351,6 +366,69 @@ void SparseTensorReader::readCOOLoop(uint64_t lvlRank,
}
}
+template <typename C, typename V>
+bool SparseTensorReader::readToBuffers(uint64_t lvlRank,
+ const uint64_t *dim2lvl,
+ C *lvlCoordinates, V *values) {
+ assert(isValid() && "Attempt to readCOO() before readHeader()");
+ const uint64_t dimRank = getRank();
+ assert(lvlRank == dimRank && "Rank mismatch");
+ detail::PermutationRef d2l(dimRank, dim2lvl);
+ // Do some manual LICM, to avoid assertions in the for-loop.
+ bool isSorted =
+ isPattern()
+ ? readToBuffersLoop<C, V, true>(lvlRank, d2l, lvlCoordinates, values)
+ : readToBuffersLoop<C, V, false>(lvlRank, d2l, lvlCoordinates,
+ values);
+
+ // Close the file and return isSorted.
+ closeFile();
+ return isSorted;
+}
+
+template <typename C, typename V, bool IsPattern>
+bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank,
+ detail::PermutationRef dim2lvl,
+ C *lvlCoordinates, V *values) {
+ const uint64_t dimRank = getRank();
+ const uint64_t nse = getNNZ();
+ std::vector<C> dimCoords(dimRank);
+ // Read the first element with isSorted=false as a way to avoid accessing its
+ // previous element.
+ bool isSorted = false;
+ char *linePtr;
+ // We inline `readCOOElement` here in order to avoid redundant assertions,
+ // since they're guaranteed by the call to `isValid()` and the construction
+ // of `dimCoords` above.
+ auto readElement = [&]() {
+ linePtr = readCOOIndices<C>(dimCoords.data());
+ dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates);
+ *values = detail::readCOOValue<V, IsPattern>(&linePtr);
+ if (isSorted) {
+ // Note that isSorted was set to false while reading the first element,
+ // to guarantee the safeness of using prevLvlCoords.
+ C *prevLvlCoords = lvlCoordinates - lvlRank;
+ // TODO: define a new CoordsLT which is like ElementLT but doesn't have
+ // the V parameter, and use it here.
+ for (uint64_t l = 0; l < lvlRank; ++l) {
+ if (prevLvlCoords[l] != lvlCoordinates[l]) {
+ if (prevLvlCoords[l] > lvlCoordinates[l])
+ isSorted = false;
+ break;
+ }
+ }
+ }
+ lvlCoordinates += lvlRank;
+ ++values;
+ };
+ readElement();
+ isSorted = true;
+ for (uint64_t n = 1; n < nse; ++n)
+ readElement();
+
+ return isSorted;
+}
+
/// Writes the sparse tensor to `filename` in extended FROSTT format.
template <typename V>
inline void writeExtFROSTT(const SparseTensorCOO<V> &coo,
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h
index 953cbe22804b5..0c5c8c2db5bcc 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h
@@ -283,6 +283,17 @@ MLIR_CRUNNERUTILS_EXPORT void delSparseTensorReader(void *p);
MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETNEXT)
#undef DECL_GETNEXT
+/// Reads the sparse tensor, stores the coordinates and values to the given
+/// memrefs. Returns a boolean value to indicate whether the COO elements are
+/// sorted.
+#define DECL_GETNEXT(VNAME, V, CNAME, C) \
+ MLIR_CRUNNERUTILS_EXPORT bool \
+ _mlir_ciface_getSparseTensorReaderRead##CNAME##VNAME( \
+ void *p, StridedMemRefType<index_type, 1> *dim2lvlRef, \
+ StridedMemRefType<C, 1> *iref, StridedMemRefType<V, 1> *vref) \
+ MLIR_SPARSETENSOR_FOREVERY_V_O(DECL_GETNEXT)
+#undef DECL_GETNEXT
+
using SparseTensorWriter = std::ostream;
/// Creates a SparseTensorWriter for outputing a sparse tensor to a file with
diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
index 3a0e5104f8406..31c5332184bfe 100644
--- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
@@ -631,6 +631,33 @@ void _mlir_ciface_getSparseTensorReaderDimSizes(
MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETNEXT)
#undef IMPL_GETNEXT
+#define IMPL_GETNEXT(VNAME, V, CNAME, C) \
+ bool _mlir_ciface_getSparseTensorReaderRead##CNAME##VNAME( \
+ void *p, StridedMemRefType<index_type, 1> *dim2lvlRef, \
+ StridedMemRefType<C, 1> *cref, StridedMemRefType<V, 1> *vref) { \
+ assert(p); \
+ auto &reader = *static_cast<SparseTensorReader *>(p); \
+ ASSERT_NO_STRIDE(cref); \
+ ASSERT_NO_STRIDE(vref); \
+ ASSERT_NO_STRIDE(dim2lvlRef); \
+ const uint64_t cSize = MEMREF_GET_USIZE(cref); \
+ const uint64_t vSize = MEMREF_GET_USIZE(vref); \
+ const uint64_t lvlRank = reader.getRank(); \
+ assert(vSize *lvlRank <= cSize); \
+ assert(vSize >= reader.getNNZ() && "Not enough space in buffers"); \
+ ASSERT_USIZE_EQ(dim2lvlRef, lvlRank); \
+ (void)cSize; \
+ (void)vSize; \
+ (void)lvlRank; \
+ C *lvlCoordinates = MEMREF_GET_PAYLOAD(cref); \
+ V *values = MEMREF_GET_PAYLOAD(vref); \
+ index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef); \
+ return reader.readToBuffers<C, V>(lvlRank, dim2lvl, lvlCoordinates, \
+ values); \
+ }
+MLIR_SPARSETENSOR_FOREVERY_V_O(IMPL_GETNEXT)
+#undef IMPL_GETNEXT
+
void *_mlir_ciface_newSparseTensorFromReader(
void *p, StridedMemRefType<index_type, 1> *lvlSizesRef,
StridedMemRefType<DimLevelType, 1> *lvlTypesRef,
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_file_io.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_file_io.mlir
index c60dedcc218d1..b6fea69d6d6b1 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_file_io.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_file_io.mlir
@@ -43,6 +43,9 @@ module {
func.func private @getSparseTensorReaderIsSymmetric(!TensorReader) -> (i1)
func.func private @copySparseTensorReaderDimSizes(!TensorReader,
memref<?xindex>) -> () attributes { llvm.emit_c_interface }
+ func.func private @getSparseTensorReaderRead0F32(!TensorReader,
+ memref<?xindex>, memref<?xindex>, memref<?xf32>)
+ -> (i1) attributes { llvm.emit_c_interface }
func.func private @getSparseTensorReaderNextF32(!TensorReader,
memref<?xindex>, memref<f32>) -> () attributes { llvm.emit_c_interface }
@@ -60,6 +63,14 @@ module {
return
}
+ func.func @dumpi2(%arg0: memref<?xindex, strided<[2], offset: ?>>) {
+ %c0 = arith.constant 0 : index
+ %v = vector.transfer_read %arg0[%c0], %c0 :
+ memref<?xindex, strided<[2], offset: ?>>, vector<17xindex>
+ vector.print %v : vector<17xindex>
+ return
+ }
+
func.func @dumpf(%arg0: memref<?xf32>) {
%c0 = arith.constant 0 : index
%d0 = arith.constant 0.0 : f32
@@ -70,39 +81,31 @@ module {
// Returns the indices and values of the tensor.
func.func @readTensorFile(%tensor: !TensorReader)
- -> (memref<?xindex>, memref<?xindex>, memref<?xf32>) {
+ -> (memref<?xindex>, memref<?xf32>, i1) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
+ %c2 = arith.constant 2 : index
%rank = call @getSparseTensorReaderRank(%tensor) : (!TensorReader) -> index
%nnz = call @getSparseTensorReaderNNZ(%tensor) : (!TensorReader) -> index
// Assume rank == 2.
- %x0s = memref.alloc(%nnz) : memref<?xindex>
- %x1s = memref.alloc(%nnz) : memref<?xindex>
+ %isize = arith.muli %c2, %nnz : index
+ %xs = memref.alloc(%isize) : memref<?xindex>
%vs = memref.alloc(%nnz) : memref<?xf32>
- %indices = memref.alloc(%rank) : memref<?xindex>
- %value = memref.alloca() : memref<f32>
- scf.for %i = %c0 to %nnz step %c1 {
- func.call @getSparseTensorReaderNextF32(%tensor, %indices, %value)
- : (!TensorReader, memref<?xindex>, memref<f32>) -> ()
- // TODO: can we use memref.subview to avoid the need for the %value
- // buffer?
- %v = memref.load %value[] : memref<f32>
- memref.store %v, %vs[%i] : memref<?xf32>
- %i0 = memref.load %indices[%c0] : memref<?xindex>
- memref.store %i0, %x0s[%i] : memref<?xindex>
- %i1 = memref.load %indices[%c1] : memref<?xindex>
- memref.store %i1, %x1s[%i] : memref<?xindex>
- }
-
- // Release the resource for the indices.
- memref.dealloc %indices : memref<?xindex>
- return %x0s, %x1s, %vs : memref<?xindex>, memref<?xindex>, memref<?xf32>
+ %dim2lvl = memref.alloca(%c2) : memref<?xindex>
+ memref.store %c0, %dim2lvl[%c0] : memref<?xindex>
+ memref.store %c1, %dim2lvl[%c1] : memref<?xindex>
+ %isSorted =func.call @getSparseTensorReaderRead0F32(%tensor, %dim2lvl, %xs, %vs)
+ : (!TensorReader, memref<?xindex>, memref<?xindex>, memref<?xf32>) -> (i1)
+ return %xs, %vs, %isSorted : memref<?xindex>, memref<?xf32>, i1
}
// Reads a COO tensor from the given file name and prints its content.
func.func @readTensorFileAndDump(%fileName: !Filename) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c2 = arith.constant 2 : index
%tensor = call @createSparseTensorReader(%fileName)
: (!Filename) -> (!TensorReader)
%rank = call @getSparseTensorReaderRank(%tensor) : (!TensorReader) -> index
@@ -116,18 +119,22 @@ module {
func.call @copySparseTensorReaderDimSizes(%tensor, %dimSizes)
: (!TensorReader, memref<?xindex>) -> ()
call @dumpi(%dimSizes) : (memref<?xindex>) -> ()
- %x0s, %x1s, %vs = call @readTensorFile(%tensor)
- : (!TensorReader) -> (memref<?xindex>, memref<?xindex>, memref<?xf32>)
- call @dumpi(%x0s) : (memref<?xindex>) -> ()
- call @dumpi(%x1s) : (memref<?xindex>) -> ()
+ %xs, %vs, %isSorted = call @readTensorFile(%tensor)
+ : (!TensorReader) -> (memref<?xindex>, memref<?xf32>, i1)
+ %x0s = memref.subview %xs[%c0][%nnz][%c2]
+ : memref<?xindex> to memref<?xindex, strided<[2], offset: ?>>
+ %x1s = memref.subview %xs[%c1][%nnz][%c2]
+ : memref<?xindex> to memref<?xindex, strided<[2], offset: ?>>
+ vector.print %isSorted : i1
+ call @dumpi2(%x0s) : (memref<?xindex, strided<[2], offset: ?>>) -> ()
+ call @dumpi2(%x1s) : (memref<?xindex, strided<[2], offset: ?>>) -> ()
call @dumpf(%vs) : (memref<?xf32>) -> ()
// Release the resources.
call @delSparseTensorReader(%tensor) : (!TensorReader) -> ()
memref.dealloc %dimSizes : memref<?xindex>
- memref.dealloc %x0s : memref<?xindex>
- memref.dealloc %x1s : memref<?xindex>
+ memref.dealloc %xs : memref<?xindex>
memref.dealloc %vs : memref<?xf32>
return
@@ -184,6 +191,7 @@ module {
// CHECK: 17
// CHECK: 0
// CHECK: ( 4, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
+ // CHECK: 1
// CHECK: ( 0, 0, 0, 0, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 )
// CHECK: ( 0, 126, 127, 254, 1, 253, 2, 0, 1, 3, 98, 126, 127, 128, 249, 253, 255 )
// CHECK: ( -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17 )
@@ -215,4 +223,4 @@ module {
return
}
-}
+}
\ No newline at end of file
More information about the Mlir-commits
mailing list