[Mlir-commits] [mlir] 6121117 - [mlir][Linalg] Fix TensorConstantOp bufferization in Linalg.

Tue Oct 13 09:37:29 PDT 2020

Author: Nicolas Vasilache
Date: 2020-10-13T16:36:56Z
New Revision: 6121117484ddd7c5a03b40004a8bba58506ce9d0

URL: https://github.com/llvm/llvm-project/commit/6121117484ddd7c5a03b40004a8bba58506ce9d0
DIFF: https://github.com/llvm/llvm-project/commit/6121117484ddd7c5a03b40004a8bba58506ce9d0.diff

LOG: [mlir][Linalg] Fix TensorConstantOp bufferization in Linalg.

TensorConstantOp bufferization currently uses the vector dialect to store constant data into memory.
Due to natural vector size and alignment properties, this is problematic with n>1-D vectors whose most minor dimension is not naturally aligned.

Instead, this revision linearizes the constant and introduces a linalg.reshape to go back to the desired shape.

Still this is still to be considered a workaround and a better longer term solution will probably involve `llvm.global`.

Differential Revision: https://reviews.llvm.org/D89311

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
    mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir
    mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp
    mlir/test/Dialect/Linalg/tensors-to-buffers.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index a2dee8c3ae65..948883e89e1e 100644

--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -816,6 +816,9 @@ class LinalgOpConverter : public BufferAssignmentConversionPattern {
                   ConversionPatternRewriter &rewriter) const final;
 };
 
+/// TensorConstantOp conversion inserts a linearized 1-D vector constant that is
+/// stored in memory. A linalg.reshape is introduced to convert to the desired
+/// n-D buffer form.
 class TensorConstantOpConverter
     : public BufferAssignmentOpConversionPattern<ConstantOp> {
 public:
@@ -827,6 +830,7 @@ class TensorConstantOpConverter
                   ConversionPatternRewriter &rewriter) const final;
 };
 
+/// TensorCastOp converts 1-1 to MemRefCastOp.
 class TensorCastOpConverter
     : public BufferAssignmentOpConversionPattern<TensorCastOp> {
 public:

diff  --git a/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir
index b706c9cef976..a06874550f56 100644
--- a/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir
+++ b/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir
@@ -4,12 +4,13 @@
 // RUN: | FileCheck %s
 
 func @main() {
-  %A = constant dense<[[1.0, 2.0], [4.0, 5.0]]> : tensor<2x2xf32>
+  %A = constant dense<[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]> : tensor<2x3xf32>
   %B = constant dense<[[1.0, 2.0, 3.0, 4.0],
-                       [5.0, 6.0, 7.0, 8.0]]> : tensor<2x4xf32>
+                       [5.0, 6.0, 7.0, 8.0],
+                       [9.0, 10.0, 11.0, 12.0]]> : tensor<3x4xf32>
   %C = constant dense<1000.0> : tensor<2x4xf32>
 
-  %D = linalg.matmul ins(%A, %B: tensor<2x2xf32>, tensor<2x4xf32>)
+  %D = linalg.matmul ins(%A, %B: tensor<2x3xf32>, tensor<3x4xf32>)
                      init(%C: tensor<2x4xf32>) -> tensor<2x4xf32>
 
   %unranked = tensor_cast %D : tensor<2x4xf32> to tensor<*xf32>
@@ -17,10 +18,11 @@ func @main() {
 
   //      CHECK: Unranked Memref base@ = {{0x[-9a-f]*}}
   // CHECK-SAME: rank = 2 offset = 0 sizes = [2, 4] strides = [4, 1] data =
-  // CHECK-NEXT: [1011, 1014, 1017, 1020]
-  // CHECK-NEXT: [1029, 1038, 1047, 1056]
+  // CHECK-NEXT: [1038,   1044,   1050,   1056]
+  // CHECK-NEXT: [1083,   1098,   1113,   1128]
 
   return
 }
 
 func @print_memref_f32(%ptr : tensor<*xf32>)
+

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp
index 650c44f9e922..303624f2898d 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp
@@ -239,22 +239,43 @@ LogicalResult mlir::linalg::LinalgOpConverter::matchAndRewrite(
 LogicalResult mlir::linalg::TensorConstantOpConverter::matchAndRewrite(
     ConstantOp op, ArrayRef<Value> operands,
     ConversionPatternRewriter &rewriter) const {
-  if (!op.getType().isa<RankedTensorType>())
+  RankedTensorType rankedTensorType = op.getType().dyn_cast<RankedTensorType>();
+  if (!rankedTensorType)
+    return failure();
+  if (llvm::any_of(rankedTensorType.getShape(), [](int64_t s) {
+        return s == 0 || ShapedType::isDynamic(s);
+      }))
     return failure();
-  auto attr = op.getValue().cast<DenseElementsAttr>();
 
-  Location loc = op.getLoc();
+  int64_t nElements = 1;
+  for (int64_t s : rankedTensorType.getShape())
+    nElements *= s;
+  Type elementType = rankedTensorType.getElementType();
   MemRefType memrefType =
       converter.convertType(op.getType()).cast<MemRefType>();
-  VectorType vectorType =
-      VectorType::get(memrefType.getShape(), memrefType.getElementType());
-  Value cstVec =
-      rewriter.create<ConstantOp>(loc, vectorType, attr.reshape(vectorType));
+  VectorType flatVectorType = VectorType::get({nElements}, elementType);
+  MemRefType memrefOfFlatVectorType = MemRefType::get({}, flatVectorType);
+  MemRefType flatMemrefType = MemRefType::get({nElements}, elementType);
 
-  MemRefType memrefOfVectorType = MemRefType::get({}, vectorType);
-  Value alloc = rewriter.create<AllocOp>(loc, memrefOfVectorType, ValueRange{});
+  Location loc = op.getLoc();
+  auto attr = op.getValue().cast<DenseElementsAttr>();
+  Value alloc =
+      rewriter.create<AllocOp>(loc, memrefOfFlatVectorType, ValueRange{});
+  Value cstVec = rewriter.create<ConstantOp>(loc, flatVectorType,
+                                             attr.reshape(flatVectorType));
   rewriter.create<StoreOp>(loc, cstVec, alloc);
-  rewriter.replaceOpWithNewOp<vector::TypeCastOp>(op, memrefType, alloc);
+
+  Value memref =
+      rewriter.create<vector::TypeCastOp>(loc, flatMemrefType, alloc);
+  if (rankedTensorType.getRank() > 1) {
+    // Introduce a linalg.reshape to flatten the memref.
+    AffineMap collapseAllDims = AffineMap::getMultiDimIdentityMap(
+        /*numDims=*/rankedTensorType.getRank(), op.getContext());
+    memref = rewriter.create<linalg::ReshapeOp>(
+        loc, memrefType, memref,
+        rewriter.getAffineMapArrayAttr(collapseAllDims));
+  }
+  rewriter.replaceOp(op, memref);
 
   return success();
 }

diff  --git a/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir b/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir
index 4d23b7e10dae..093732c7f47d 100644
--- a/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir
+++ b/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir
@@ -126,28 +126,29 @@ func @dynamic_results(%arg0: tensor<?x?xf32>)
 
 // -----
 
-func @foo() -> tensor<4xf32> {
+func @foo() -> tensor<2x3xf32> {
 // CHECK-LABEL: func @foo(
-//  CHECK-SAME:   %[[A:[0-9a-z]*]]: memref<4xf32>) {
-
-  %0 = constant dense<[0.0, 1.0, 2.0, 3.0]> : tensor<4xf32>
-//  CHECK-NEXT:   %[[CST:.*]] = constant dense<[0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00]> : vector<4xf32>
-//  CHECK-NEXT:   %[[ALLOC:.*]] = alloc() : memref<vector<4xf32>>
-//  CHECK-NEXT:   store %[[CST]], %[[ALLOC]][] : memref<vector<4xf32>>
-//  CHECK-NEXT:   %[[RES:.*]] = vector.type_cast %[[ALLOC]] : memref<vector<4xf32>> to memref<4xf32>
-
-  return %0 : tensor<4xf32>
-//  CHECK-NEXT:   linalg.copy(%[[RES]], %[[A]]) : memref<4xf32>, memref<4xf32>
-//  CHECK-NEXT:   dealloc %[[ALLOC]] : memref<vector<4xf32>>
+//  CHECK-SAME:   %[[A:[0-9a-z]*]]: memref<2x3xf32>) {
+
+  %0 = constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf32>
+//  CHECK-NEXT:   %[[ALLOC:.*]] = alloc() : memref<vector<6xf32>>
+//  CHECK-NEXT:   %[[CST:.*]] = constant dense<[0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00, 5.000000e+00]> : vector<6xf32>
+//  CHECK-NEXT:   store %[[CST]], %[[ALLOC]][] : memref<vector<6xf32>>
+//  CHECK-NEXT:   %[[FLAT:.*]] = vector.type_cast %[[ALLOC]] : memref<vector<6xf32>> to memref<6xf32>
+//  CHECK-NEXT:   %[[RES:.*]] = linalg.reshape %[[FLAT]] {{.*}} : memref<6xf32> into memref<2x3xf32>
+
+  return %0 : tensor<2x3xf32>
+//  CHECK-NEXT:   linalg.copy(%[[RES]], %[[A]]) : memref<2x3xf32>, memref<2x3xf32>
+//  CHECK-NEXT:   dealloc %[[ALLOC]] : memref<vector<6xf32>>
 //  CHECK-NEXT:   return
 }
 
 func @bar() {
 // CHECK-LABEL: func @bar() {
 
-  %0 = call @foo() : () -> tensor<4xf32>
-//  CHECK-NEXT:   %[[ALLOC:.*]] = alloc() : memref<4xf32>
-//  CHECK-NEXT:   call @foo(%[[ALLOC]]) : (memref<4xf32>) -> ()
+  %0 = call @foo() : () -> tensor<2x3xf32>
+//  CHECK-NEXT:   %[[ALLOC:.*]] = alloc() : memref<2x3xf32>
+//  CHECK-NEXT:   call @foo(%[[ALLOC]]) : (memref<2x3xf32>) -> ()
 
   // Instead of relying on tensor_store which introduces aliasing, we rely on
   // the conversion of print_memref_f32(tensor<*xf32>) to
@@ -155,15 +156,15 @@ func @bar() {
   // Note that this is skipping a step and we would need at least some function
   // attribute to declare that this conversion is valid (e.g. when we statically
   // know that things will play nicely at the C ABI boundary).
-  %unranked = tensor_cast %0 : tensor<4xf32> to tensor<*xf32>
+  %unranked = tensor_cast %0 : tensor<2x3xf32> to tensor<*xf32>
 //  CHECK-NEXT:   %[[UNRANKED:.*]] = memref_cast %[[ALLOC]] :
-//  CHECK-SAME:     memref<4xf32> to memref<*xf32>
+//  CHECK-SAME:     memref<2x3xf32> to memref<*xf32>
 
   call @print_memref_f32(%unranked) : (tensor<*xf32>) -> ()
 //  CHECK-NEXT:   call @print_memref_f32(%[[UNRANKED]]) : (memref<*xf32>) -> ()
 
   return
-//  CHECK-NEXT:   dealloc %[[ALLOC]] : memref<4xf32>
+//  CHECK-NEXT:   dealloc %[[ALLOC]] : memref<2x3xf32>
 //  CHECK-NEXT:   return
 }