[Mlir-commits] [mlir] 128a9e1 - [mlir][sparse] cleanup ABI issues in C interface with memrefs

Tue Sep 21 21:58:21 PDT 2021

Author: Aart Bik
Date: 2021-09-21T21:58:12-07:00
New Revision: 128a9e1cb480188fc68aaedbcaf92d8ee74a92c7

URL: https://github.com/llvm/llvm-project/commit/128a9e1cb480188fc68aaedbcaf92d8ee74a92c7
DIFF: https://github.com/llvm/llvm-project/commit/128a9e1cb480188fc68aaedbcaf92d8ee74a92c7.diff

LOG: [mlir][sparse] cleanup ABI issues in C interface with memrefs

This change adds automatic wrapper functoins with emit_c_interface
to all methods in the sparse support library that deal with MEMREFs.
The wrappers will take care of passing MEMREFs by value internally
and by pointer externally, thereby avoiding ABI issues across platforms.

Reviewed By: mehdi_amini

Differential Revision: https://reviews.llvm.org/D110219

Added: 
    

Modified: 
    mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
    mlir/lib/ExecutionEngine/SparseUtils.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index 14ff5679ff9a..c5997f6990f2 100644

--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -83,7 +83,7 @@ getDimLevelTypeEncoding(SparseTensorEncodingAttr::DimLevelType dlt) {
 
 /// Returns integers of given width and values as a constant tensor.
 /// We cast the static shape into a dynamic shape to ensure that the
-/// method signature remains uniform accross 
diff erent tensor dimensions.
+/// method signature remains uniform across 
diff erent tensor dimensions.
 static Value getTensor(ConversionPatternRewriter &rewriter, unsigned width,
                        Location loc, ArrayRef<APInt> values) {
   Type etp = rewriter.getIntegerType(width);
@@ -95,20 +95,25 @@ static Value getTensor(ConversionPatternRewriter &rewriter, unsigned width,
   return rewriter.create<tensor::CastOp>(loc, tt2, elts);
 }
 
-/// Returns function reference (first hit also inserts into module).
+/// Returns a function reference (first hit also inserts into module). Sets
+/// the "_emit_c_interface" on the function declaration when requested,
+/// so that LLVM lowering generates a wrapper function that takes care
+/// of ABI complications with passing in and returning MemRefs to C functions.
 static FlatSymbolRefAttr getFunc(Operation *op, StringRef name, Type resultType,
-                                 ValueRange operands) {
+                                 ValueRange operands,
+                                 bool emitCInterface = false) {
   MLIRContext *context = op->getContext();
   auto module = op->getParentOfType<ModuleOp>();
   auto result = SymbolRefAttr::get(context, name);
   auto func = module.lookupSymbol<FuncOp>(result.getAttr());
   if (!func) {
     OpBuilder moduleBuilder(module.getBodyRegion());
-    moduleBuilder
-        .create<FuncOp>(
-            op->getLoc(), name,
-            FunctionType::get(context, operands.getTypes(), resultType))
-        .setPrivate();
+    func = moduleBuilder.create<FuncOp>(
+        op->getLoc(), name,
+        FunctionType::get(context, operands.getTypes(), resultType));
+    func.setPrivate();
+    if (emitCInterface)
+      func->setAttr("llvm.emit_c_interface", UnitAttr::get(context));
   }
   return result;
 }
@@ -171,8 +176,9 @@ static Value genNewCall(ConversionPatternRewriter &rewriter, Operation *op,
   params.push_back(ptr);
   // Generate the call to create new tensor.
   StringRef name = "newSparseTensor";
-  auto call =
-      rewriter.create<CallOp>(loc, pTp, getFunc(op, name, pTp, params), params);
+  auto call = rewriter.create<CallOp>(
+      loc, pTp, getFunc(op, name, pTp, params, /*emitCInterface=*/true),
+      params);
   return call.getResult(0);
 }
 
@@ -210,7 +216,9 @@ static void genAddEltCall(ConversionPatternRewriter &rewriter, Operation *op,
   params.push_back(ind);
   params.push_back(perm);
   Type pTp = LLVM::LLVMPointerType::get(IntegerType::get(op->getContext(), 8));
-  rewriter.create<CallOp>(loc, pTp, getFunc(op, name, pTp, params), params);
+  rewriter.create<CallOp>(
+      loc, pTp, getFunc(op, name, pTp, params, /*emitCInterface=*/true),
+      params);
 }
 
 //===----------------------------------------------------------------------===//
@@ -369,7 +377,9 @@ class SparseTensorToPointersConverter
     else
       return failure();
     rewriter.replaceOpWithNewOp<CallOp>(
-        op, resType, getFunc(op, name, resType, operands), operands);
+        op, resType,
+        getFunc(op, name, resType, operands, /*emitCInterface=*/true),
+        operands);
     return success();
   }
 };
@@ -397,7 +407,9 @@ class SparseTensorToIndicesConverter : public OpConversionPattern<ToIndicesOp> {
     else
       return failure();
     rewriter.replaceOpWithNewOp<CallOp>(
-        op, resType, getFunc(op, name, resType, operands), operands);
+        op, resType,
+        getFunc(op, name, resType, operands, /*emitCInterface=*/true),
+        operands);
     return success();
   }
 };
@@ -427,7 +439,9 @@ class SparseTensorToValuesConverter : public OpConversionPattern<ToValuesOp> {
     else
       return failure();
     rewriter.replaceOpWithNewOp<CallOp>(
-        op, resType, getFunc(op, name, resType, operands), operands);
+        op, resType,
+        getFunc(op, name, resType, operands, /*emitCInterface=*/true),
+        operands);
     return success();
   }
 };

diff  --git a/mlir/lib/ExecutionEngine/SparseUtils.cpp b/mlir/lib/ExecutionEngine/SparseUtils.cpp
index 3497fe5b49b3..7168946c28c0 100644
--- a/mlir/lib/ExecutionEngine/SparseUtils.cpp
+++ b/mlir/lib/ExecutionEngine/SparseUtils.cpp
@@ -111,8 +111,9 @@ struct SparseTensorCOO {
   /// the given ordering and expects subsequent add() calls to honor
   /// that same ordering for the given indices. The result is a
   /// fully permuted coordinate scheme.
-  static SparseTensorCOO<V> *newSparseTensorCOO(uint64_t size, uint64_t *sizes,
-                                                uint64_t *perm,
+  static SparseTensorCOO<V> *newSparseTensorCOO(uint64_t size,
+                                                const uint64_t *sizes,
+                                                const uint64_t *perm,
                                                 uint64_t capacity = 0) {
     std::vector<uint64_t> permsz(size);
     for (uint64_t r = 0; r < size; r++)
@@ -182,8 +183,8 @@ class SparseTensorStorage : public SparseTensorStorageBase {
   /// Constructs a sparse tensor storage scheme from the given sparse
   /// tensor in coordinate scheme following the given per-rank dimension
   /// dense/sparse annotations.
-  SparseTensorStorage(SparseTensorCOO<V> *tensor, uint8_t *sparsity,
-                      uint64_t *perm)
+  SparseTensorStorage(SparseTensorCOO<V> *tensor, const uint8_t *sparsity,
+                      const uint64_t *perm)
       : sizes(tensor->getSizes()), rev(getRank()), pointers(getRank()),
         indices(getRank()) {
     // Store "reverse" permutation.
@@ -235,7 +236,7 @@ class SparseTensorStorage : public SparseTensorStorageBase {
 
   /// Returns this sparse tensor storage scheme as a new memory-resident
   /// sparse tensor in coordinate scheme with the given dimension order.
-  SparseTensorCOO<V> *toCOO(uint64_t *perm) {
+  SparseTensorCOO<V> *toCOO(const uint64_t *perm) {
     // Restore original order of the dimension sizes and allocate coordinate
     // scheme with desired new ordering specified in perm.
     uint64_t size = getRank();
@@ -258,8 +259,9 @@ class SparseTensorStorage : public SparseTensorStorageBase {
 
   /// Factory method. Expects a coordinate scheme that respects the same
   /// permutation as is desired for the new sparse storage scheme.
-  static SparseTensorStorage<P, I, V> *
-  newSparseTensor(SparseTensorCOO<V> *t, uint8_t *sparsity, uint64_t *perm) {
+  static SparseTensorStorage<P, I, V> *newSparseTensor(SparseTensorCOO<V> *t,
+                                                       const uint8_t *sparsity,
+                                                       const uint64_t *perm) {
     t->sort(); // sort lexicographically
     SparseTensorStorage<P, I, V> *n =
         new SparseTensorStorage<P, I, V>(t, sparsity, perm);
@@ -271,7 +273,7 @@ class SparseTensorStorage : public SparseTensorStorageBase {
   /// Initializes sparse tensor storage scheme from a memory-resident sparse
   /// tensor in coordinate scheme. This method prepares the pointers and indices
   /// arrays under the given per-rank dimension dense/sparse annotations.
-  void fromCOO(SparseTensorCOO<V> *tensor, uint8_t *sparsity, uint64_t lo,
+  void fromCOO(SparseTensorCOO<V> *tensor, const uint8_t *sparsity, uint64_t lo,
                uint64_t hi, uint64_t d) {
     const std::vector<Element<V>> &elements = tensor->getElements();
     // Once dimensions are exhausted, insert the numerical values.
@@ -428,8 +430,8 @@ static void readExtFROSTTHeader(FILE *file, char *name, uint64_t *idata) {
 /// sparse tensor in coordinate scheme.
 template <typename V>
 static SparseTensorCOO<V> *openSparseTensorCOO(char *filename, uint64_t size,
-                                               uint64_t *sizes,
-                                               uint64_t *perm) {
+                                               const uint64_t *sizes,
+                                               const uint64_t *perm) {
   // Open the file.
   FILE *file = fopen(filename, "r");
   if (!file) {
@@ -515,6 +517,17 @@ char *getTensorFilename(uint64_t id) {
     uint64_t strides[1];                                                       \
   }
 
+TEMPLATE(MemRef1DU64, uint64_t);
+TEMPLATE(MemRef1DU32, uint32_t);
+TEMPLATE(MemRef1DU16, uint16_t);
+TEMPLATE(MemRef1DU8, uint8_t);
+TEMPLATE(MemRef1DI64, int64_t);
+TEMPLATE(MemRef1DI32, int32_t);
+TEMPLATE(MemRef1DI16, int16_t);
+TEMPLATE(MemRef1DI8, int8_t);
+TEMPLATE(MemRef1DF64, double);
+TEMPLATE(MemRef1DF32, float);
+
 #define CASE(p, i, v, P, I, V)                                                 \
   if (ptrTp == (p) && indTp == (i) && valTp == (v)) {                          \
     SparseTensorCOO<V> *tensor = nullptr;                                      \
@@ -531,30 +544,36 @@ char *getTensorFilename(uint64_t id) {
                                                          perm);                \
   }
 
-#define IMPL1(RET, NAME, TYPE, LIB)                                            \
-  RET NAME(void *tensor) {                                                     \
+#define IMPL1(REF, NAME, TYPE, LIB)                                            \
+  void _mlir_ciface_##NAME(REF *ref, void *tensor) {                           \
     std::vector<TYPE> *v;                                                      \
     static_cast<SparseTensorStorageBase *>(tensor)->LIB(&v);                   \
-    return {v->data(), v->data(), 0, {v->size()}, {1}};                        \
+    ref->base = ref->data = v->data();                                         \
+    ref->off = 0;                                                              \
+    ref->sizes[0] = v->size();                                                 \
+    ref->strides[0] = 1;                                                       \
   }
 
-#define IMPL2(RET, NAME, TYPE, LIB)                                            \
-  RET NAME(void *tensor, uint64_t d) {                                         \
+#define IMPL2(REF, NAME, TYPE, LIB)                                            \
+  void _mlir_ciface_##NAME(REF *ref, void *tensor, uint64_t d) {               \
     std::vector<TYPE> *v;                                                      \
     static_cast<SparseTensorStorageBase *>(tensor)->LIB(&v, d);                \
-    return {v->data(), v->data(), 0, {v->size()}, {1}};                        \
+    ref->base = ref->data = v->data();                                         \
+    ref->off = 0;                                                              \
+    ref->sizes[0] = v->size();                                                 \
+    ref->strides[0] = 1;                                                       \
   }
 
 #define IMPL3(NAME, TYPE)                                                      \
-  void *NAME(void *tensor, TYPE value, uint64_t *ibase, uint64_t *idata,       \
-             uint64_t ioff, uint64_t isize, uint64_t istride, uint64_t *pbase, \
-             uint64_t *pdata, uint64_t poff, uint64_t psize,                   \
-             uint64_t pstride) {                                               \
-    assert(istride == 1 && pstride == 1 && isize == psize);                    \
-    uint64_t *indx = idata + ioff;                                             \
+  void *_mlir_ciface_##NAME(void *tensor, TYPE value, MemRef1DU64 *iref,       \
+                            MemRef1DU64 *pref) {                               \
     if (!value)                                                                \
       return tensor;                                                           \
-    uint64_t *perm = pdata + poff;                                             \
+    assert(iref->strides[0] == 1 && pref->strides[0] == 1);                    \
+    assert(iref->sizes[0] == pref->sizes[0]);                                  \
+    const uint64_t *indx = iref->data + iref->off;                             \
+    const uint64_t *perm = pref->data + pref->off;                             \
+    uint64_t isize = iref->sizes[0];                                           \
     std::vector<uint64_t> indices(isize);                                      \
     for (uint64_t r = 0; r < isize; r++)                                       \
       indices[perm[r]] = indx[r];                                              \
@@ -562,17 +581,6 @@ char *getTensorFilename(uint64_t id) {
     return tensor;                                                             \
   }
 
-TEMPLATE(MemRef1DU64, uint64_t);
-TEMPLATE(MemRef1DU32, uint32_t);
-TEMPLATE(MemRef1DU16, uint16_t);
-TEMPLATE(MemRef1DU8, uint8_t);
-TEMPLATE(MemRef1DI64, int64_t);
-TEMPLATE(MemRef1DI32, int32_t);
-TEMPLATE(MemRef1DI16, int16_t);
-TEMPLATE(MemRef1DI8, int8_t);
-TEMPLATE(MemRef1DF64, double);
-TEMPLATE(MemRef1DF32, float);
-
 enum OverheadTypeEnum : uint64_t { kU64 = 1, kU32 = 2, kU16 = 3, kU8 = 4 };
 
 enum PrimaryTypeEnum : uint64_t {
@@ -591,19 +599,17 @@ enum PrimaryTypeEnum : uint64_t {
 ///  1 : ptr contains coordinate scheme to assign to new storage
 ///  2 : returns empty coordinate scheme to fill (call back 1 to setup)
 ///  3 : returns coordinate scheme from storage in ptr (call back 1 to convert)
-void *newSparseTensor(uint8_t *abase, uint8_t *adata, uint64_t aoff,
-                      uint64_t asize, uint64_t astride, uint64_t *sbase,
-                      uint64_t *sdata, uint64_t soff, uint64_t ssize,
-                      uint64_t sstride, uint64_t *pbase, uint64_t *pdata,
-                      uint64_t poff, uint64_t psize, uint64_t pstride,
-                      uint64_t ptrTp, uint64_t indTp, uint64_t valTp,
-                      uint32_t action, void *ptr) {
-  assert(astride == 1 && sstride == 1 && pstride == 1);
-  assert(asize == ssize && ssize == psize);
-  uint8_t *sparsity = adata + aoff;
-  uint64_t *sizes = sdata + soff;
-  uint64_t *perm = pdata + poff;
-  uint64_t size = asize;
+void *_mlir_ciface_newSparseTensor(MemRef1DU8 *aref, // NOLINT
+                                   MemRef1DU64 *sref, MemRef1DU64 *pref,
+                                   uint64_t ptrTp, uint64_t indTp,
+                                   uint64_t valTp, uint32_t action, void *ptr) {
+  assert(aref->strides[0] == 1 && sref->strides[0] == 1 &&
+         pref->strides[0] == 1);
+  assert(aref->sizes[0] == sref->sizes[0] && sref->sizes[0] == pref->sizes[0]);
+  const uint8_t *sparsity = aref->data + aref->off;
+  const uint64_t *sizes = sref->data + sref->off;
+  const uint64_t *perm = pref->data + pref->off;
+  uint64_t size = aref->sizes[0];
 
   // Double matrices with all combinations of overhead storage.
   CASE(kU64, kU64, kF64, uint64_t, uint64_t, double);