[Mlir-commits] [mlir] e939644 - [mlir][memref] Implement lowering of memref.copy to llvm

Stephan Herhut llvmlistbot at llvm.org
Mon Jun 28 05:52:16 PDT 2021


Author: Stephan Herhut
Date: 2021-06-28T14:52:07+02:00
New Revision: e9396449777f02d573deb25d603ee1b1d6e568c1

URL: https://github.com/llvm/llvm-project/commit/e9396449777f02d573deb25d603ee1b1d6e568c1
DIFF: https://github.com/llvm/llvm-project/commit/e9396449777f02d573deb25d603ee1b1d6e568c1.diff

LOG: [mlir][memref] Implement lowering of memref.copy to llvm

This lowering uses a library call to implement copying in the general case, i.e.,
supporting arbitrary rank and strided layouts.

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h
    mlir/include/mlir/ExecutionEngine/CRunnerUtils.h
    mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
    mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp
    mlir/lib/ExecutionEngine/CRunnerUtils.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h
index 7efff9774cd50..6380ff2d8e132 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h
@@ -45,6 +45,8 @@ LLVM::LLVMFuncOp lookupOrCreateMallocFn(ModuleOp moduleOp, Type indexType);
 LLVM::LLVMFuncOp lookupOrCreateAlignedAllocFn(ModuleOp moduleOp,
                                               Type indexType);
 LLVM::LLVMFuncOp lookupOrCreateFreeFn(ModuleOp moduleOp);
+LLVM::LLVMFuncOp lookupOrCreateMemRefCopyFn(ModuleOp moduleOp, Type indexType,
+                                            Type unrankedDescriptorType);
 
 /// Create a FuncOp with signature `resultType`(`paramTypes`)` and name `name`.
 LLVM::LLVMFuncOp lookupOrCreateFn(ModuleOp moduleOp, StringRef name,

diff  --git a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h
index fb0b2a65a67eb..bd855fcc03a96 100644
--- a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h
+++ b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h
@@ -330,6 +330,13 @@ class DynamicMemRefType {
   const int64_t *strides;
 };
 
+//===----------------------------------------------------------------------===//
+// Small runtime support library for memref.copy lowering during codegen.
+//===----------------------------------------------------------------------===//
+extern "C" MLIR_CRUNNERUTILS_EXPORT void
+memrefCopy(int64_t elemSize, UnrankedMemRefType<char> *src,
+           UnrankedMemRefType<char> *dst);
+
 //===----------------------------------------------------------------------===//
 // Small runtime support library for vector.print lowering during codegen.
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
index db5918e95f182..eb390bf8844fa 100644
--- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
+++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
@@ -2618,6 +2618,68 @@ struct MemRefCastOpLowering : public ConvertOpToLLVMPattern<memref::CastOp> {
   }
 };
 
+struct MemRefCopyOpLowering : public ConvertOpToLLVMPattern<memref::CopyOp> {
+  using ConvertOpToLLVMPattern<memref::CopyOp>::ConvertOpToLLVMPattern;
+
+  LogicalResult
+  matchAndRewrite(memref::CopyOp op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto loc = op.getLoc();
+    memref::CopyOp::Adaptor adaptor(operands);
+    auto srcType = op.source().getType().cast<BaseMemRefType>();
+    auto targetType = op.target().getType().cast<BaseMemRefType>();
+
+    // First make sure we have an unranked memref descriptor representation.
+    auto makeUnranked = [&, this](Value ranked, BaseMemRefType type) {
+      auto rank = rewriter.create<LLVM::ConstantOp>(
+          loc, getIndexType(), rewriter.getIndexAttr(type.getRank()));
+      auto *typeConverter = getTypeConverter();
+      auto ptr =
+          typeConverter->promoteOneMemRefDescriptor(loc, ranked, rewriter);
+      auto voidPtr =
+          rewriter.create<LLVM::BitcastOp>(loc, getVoidPtrType(), ptr)
+              .getResult();
+      auto unrankedType =
+          UnrankedMemRefType::get(type.getElementType(), type.getMemorySpace());
+      return UnrankedMemRefDescriptor::pack(rewriter, loc, *typeConverter,
+                                            unrankedType,
+                                            ValueRange{rank, voidPtr});
+    };
+
+    Value unrankedSource = srcType.hasRank()
+                               ? makeUnranked(adaptor.source(), srcType)
+                               : adaptor.source();
+    Value unrankedTarget = targetType.hasRank()
+                               ? makeUnranked(adaptor.target(), targetType)
+                               : adaptor.target();
+
+    // Now promote the unranked descriptors to the stack.
+    auto one = rewriter.create<LLVM::ConstantOp>(loc, getIndexType(),
+                                                 rewriter.getIndexAttr(1));
+    auto promote = [&](Value desc) {
+      auto ptrType = LLVM::LLVMPointerType::get(desc.getType());
+      auto allocated =
+          rewriter.create<LLVM::AllocaOp>(loc, ptrType, ValueRange{one});
+      rewriter.create<LLVM::StoreOp>(loc, desc, allocated);
+      return allocated;
+    };
+
+    auto sourcePtr = promote(unrankedSource);
+    auto targetPtr = promote(unrankedTarget);
+
+    auto elemSize = rewriter.create<LLVM::ConstantOp>(
+        loc, getIndexType(),
+        rewriter.getIndexAttr(srcType.getElementTypeBitWidth() / 8));
+    auto copyFn = LLVM::lookupOrCreateMemRefCopyFn(
+        op->getParentOfType<ModuleOp>(), getIndexType(), sourcePtr.getType());
+    rewriter.create<LLVM::CallOp>(loc, copyFn,
+                                  ValueRange{elemSize, sourcePtr, targetPtr});
+    rewriter.eraseOp(op);
+
+    return success();
+  }
+};
+
 /// Extracts allocated, aligned pointers and offset from a ranked or unranked
 /// memref type. In unranked case, the fields are extracted from the underlying
 /// ranked descriptor.
@@ -4009,6 +4071,7 @@ void mlir::populateStdToLLVMMemoryConversionPatterns(
       GetGlobalMemrefOpLowering,
       LoadOpLowering,
       MemRefCastOpLowering,
+      MemRefCopyOpLowering,
       MemRefReinterpretCastOpLowering,
       MemRefReshapeOpLowering,
       RankOpLowering,

diff  --git a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp
index a43c2251c2d99..47a5851b51f2e 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp
@@ -35,6 +35,7 @@ static constexpr llvm::StringRef kPrintNewline = "printNewline";
 static constexpr llvm::StringRef kMalloc = "malloc";
 static constexpr llvm::StringRef kAlignedAlloc = "aligned_alloc";
 static constexpr llvm::StringRef kFree = "free";
+static constexpr llvm::StringRef kMemRefCopy = "memref_copy";
 
 /// Generic print function lookupOrCreate helper.
 LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateFn(ModuleOp moduleOp, StringRef name,
@@ -114,6 +115,15 @@ LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateFreeFn(ModuleOp moduleOp) {
       LLVM::LLVMVoidType::get(moduleOp->getContext()));
 }
 
+LLVM::LLVMFuncOp
+mlir::LLVM::lookupOrCreateMemRefCopyFn(ModuleOp moduleOp, Type indexType,
+                                       Type unrankedDescriptorType) {
+  return LLVM::lookupOrCreateFn(
+      moduleOp, kMemRefCopy,
+      ArrayRef<Type>{indexType, unrankedDescriptorType, unrankedDescriptorType},
+      LLVM::LLVMVoidType::get(moduleOp->getContext()));
+}
+
 Operation::result_range mlir::LLVM::createLLVMCall(OpBuilder &b, Location loc,
                                                    LLVM::LLVMFuncOp fn,
                                                    ValueRange paramTypes,

diff  --git a/mlir/lib/ExecutionEngine/CRunnerUtils.cpp b/mlir/lib/ExecutionEngine/CRunnerUtils.cpp
index e5b682a7b6de5..bf96afb73725b 100644
--- a/mlir/lib/ExecutionEngine/CRunnerUtils.cpp
+++ b/mlir/lib/ExecutionEngine/CRunnerUtils.cpp
@@ -18,8 +18,10 @@
 #include <sys/time.h>
 #endif // _WIN32
 
+#include <alloca.h>
 #include <cinttypes>
 #include <cstdio>
+#include <string.h>
 
 #ifdef MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS
 
@@ -36,6 +38,52 @@ extern "C" void printClose() { fputs(" )", stdout); }
 extern "C" void printComma() { fputs(", ", stdout); }
 extern "C" void printNewline() { fputc('\n', stdout); }
 
+extern "C" MLIR_CRUNNERUTILS_EXPORT void
+memrefCopy(int64_t elemSize, UnrankedMemRefType<char> *srcArg,
+           UnrankedMemRefType<char> *dstArg) {
+  DynamicMemRefType<char> src(*srcArg);
+  DynamicMemRefType<char> dst(*dstArg);
+
+  int64_t rank = src.rank;
+  int64_t *indices = static_cast<int64_t *>(alloca(sizeof(int64_t) * rank));
+  int64_t *srcStrides = static_cast<int64_t *>(alloca(sizeof(int64_t) * rank));
+  int64_t *dstStrides = static_cast<int64_t *>(alloca(sizeof(int64_t) * rank));
+
+  char *srcPtr = src.data + src.offset * elemSize;
+  char *dstPtr = dst.data + dst.offset * elemSize;
+
+  // Initialize index and scale strides.
+  for (int rankp = 0; rankp < rank; ++rankp) {
+    indices[rankp] = 0;
+    srcStrides[rankp] = src.strides[rankp] * elemSize;
+    dstStrides[rankp] = dst.strides[rankp] * elemSize;
+  }
+
+  int64_t readIndex = 0, writeIndex = 0;
+  for (;;) {
+    // Copy over the element, byte by byte.
+    memcpy(dstPtr + writeIndex, srcPtr + readIndex, elemSize);
+    // Advance index and read position.
+    for (int64_t axis = rank - 1; axis >= 0; --axis) {
+      // Advance at current axis.
+      auto newIndex = ++indices[axis];
+      readIndex += srcStrides[axis];
+      writeIndex += dstStrides[axis];
+      // If this is a valid index, we have our next index, so continue copying.
+      if (src.sizes[axis] != newIndex)
+        break;
+      // We reached the end of this axis. If this is axis 0, we are done.
+      if (axis == 0)
+        return;
+      // Else, reset to 0 and undo the advancement of the linear index that
+      // this axis had. The continue with the axis one outer.
+      indices[axis] = 0;
+      readIndex -= src.sizes[axis] * srcStrides[axis];
+      writeIndex -= dst.sizes[axis] * dstStrides[axis];
+    }
+  }
+}
+
 /// Prints GFLOPS rating.
 extern "C" void print_flops(double flops) {
   fprintf(stderr, "%lf GFLOPS\n", flops / 1.0E9);


        


More information about the Mlir-commits mailing list