[Mlir-commits] [mlir] f4abd3e - [mlir] Add std.dynamic_tensor_from_elements bufferization.

Mon Oct 19 15:54:12 PDT 2020

Author: Sean Silva
Date: 2020-10-19T15:51:45-07:00
New Revision: f4abd3ed6d95e66470a0c8cc132e61782896527a

URL: https://github.com/llvm/llvm-project/commit/f4abd3ed6d95e66470a0c8cc132e61782896527a
DIFF: https://github.com/llvm/llvm-project/commit/f4abd3ed6d95e66470a0c8cc132e61782896527a.diff

LOG: [mlir] Add std.dynamic_tensor_from_elements bufferization.

It's unfortunate that this requires adding a dependency on scf dialect
to std bufferization (and hence all of std transforms). This is a bit
perilous. We might want a lib/Transforms/Bufferize/ with a separate
bufferization library per dialect?

Differential Revision: https://reviews.llvm.org/D89667

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
    mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td
    mlir/lib/Dialect/StandardOps/Transforms/Bufferize.cpp
    mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt
    mlir/lib/Dialect/StandardOps/Transforms/PassDetail.h
    mlir/test/Dialect/Standard/bufferize.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
index 5ecc2cae4c15..31fb780cf145 100644

--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
@@ -1755,7 +1755,9 @@ def DynamicTensorFromElementsOp : Std_Op<"dynamic_tensor_from_elements",
 
     The body region defines the tensor's elements. It takes index operands as
     its region arguments that span the index space. The element at the given
-    position is yielded with the `yield` operation (see `YieldOp`).
+    position is yielded with the `yield` operation (see `YieldOp`). There is
+    no defined ordering to the invocations of the body. It is conceptually
+    a "parallel map" operation.
 
     Example:
 

diff  --git a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td
index ff5a5a63b24b..1ccef1d8f4ea 100644
--- a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td
@@ -19,6 +19,7 @@ def ExpandAtomic : FunctionPass<"expand-atomic"> {
 def StdBufferize : FunctionPass<"std-bufferize"> {
   let summary = "Bufferize the std dialect";
   let constructor = "mlir::createStdBufferizePass()";
+  let dependentDialects = ["scf::SCFDialect"];
 }
 
 #endif // MLIR_DIALECT_STANDARD_TRANSFORMS_PASSES

diff  --git a/mlir/lib/Dialect/StandardOps/Transforms/Bufferize.cpp b/mlir/lib/Dialect/StandardOps/Transforms/Bufferize.cpp
index 0ebc97b626c1..e5b71f0fce75 100644
--- a/mlir/lib/Dialect/StandardOps/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/StandardOps/Transforms/Bufferize.cpp
@@ -12,12 +12,67 @@
 
 #include "mlir/Transforms/Bufferize.h"
 #include "PassDetail.h"
+#include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/StandardOps/Transforms/Passes.h"
+#include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/Transforms/DialectConversion.h"
 
 using namespace mlir;
 
+namespace {
+class BufferizeDynamicTensorFromElementsOp
+    : public OpConversionPattern<DynamicTensorFromElementsOp> {
+public:
+  using OpConversionPattern::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(DynamicTensorFromElementsOp op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final {
+    // Allocate memory.
+    Location loc = op.getLoc();
+    DynamicTensorFromElementsOp::Adaptor transformed(operands);
+    RankedTensorType tensorType = op.getType().cast<RankedTensorType>();
+    MemRefType memrefType =
+        MemRefType::get(tensorType.getShape(), tensorType.getElementType());
+    Value result =
+        rewriter.create<AllocOp>(loc, memrefType, transformed.dynamicExtents());
+
+    // Collect loop bounds.
+    int64_t rank = tensorType.getRank();
+    Value zero = rewriter.create<ConstantIndexOp>(loc, 0);
+    Value one = rewriter.create<ConstantIndexOp>(loc, 1);
+    SmallVector<Value, 4> lowerBounds(rank, zero);
+    SmallVector<Value, 4> steps(rank, one);
+    SmallVector<Value, 4> upperBounds;
+    int nextDynamicIndex = 0;
+    for (int i = 0; i < rank; i++) {
+      Value upperBound =
+          tensorType.isDynamicDim(i)
+              ? transformed.dynamicExtents()[nextDynamicIndex++]
+              : rewriter.create<ConstantIndexOp>(loc, memrefType.getDimSize(i));
+      upperBounds.push_back(upperBound);
+    }
+
+    // Generate tensor elements with a parallel loop.
+    rewriter.create<scf::ParallelOp>(
+        loc, lowerBounds, upperBounds, steps,
+        [&](OpBuilder &b, Location loc, ValueRange ivs) {
+          BlockAndValueMapping mapping;
+          mapping.map(op.body().getArguments(), ivs);
+          for (auto &nestedOp : op.getBody()->without_terminator())
+            b.clone(nestedOp, mapping);
+          auto yieldOp = cast<YieldOp>(op.getBody()->getTerminator());
+          b.create<StoreOp>(loc, mapping.lookup(yieldOp.value()), result, ivs);
+          b.create<scf::YieldOp>(loc);
+        });
+
+    rewriter.replaceOp(op, {result});
+    return success();
+  }
+};
+} // namespace
+
 namespace {
 class BufferizeExtractElementOp : public OpConversionPattern<ExtractElementOp> {
 public:
@@ -73,8 +128,10 @@ class BufferizeTensorFromElementsOp
 void mlir::populateStdBufferizePatterns(MLIRContext *context,
                                         BufferizeTypeConverter &typeConverter,
                                         OwningRewritePatternList &patterns) {
-  patterns.insert<BufferizeExtractElementOp, BufferizeTensorCastOp,
-                  BufferizeTensorFromElementsOp>(typeConverter, context);
+  patterns
+      .insert<BufferizeDynamicTensorFromElementsOp, BufferizeExtractElementOp,
+              BufferizeTensorCastOp, BufferizeTensorFromElementsOp>(
+          typeConverter, context);
 }
 
 namespace {
@@ -86,9 +143,11 @@ struct StdBufferizePass : public StdBufferizeBase<StdBufferizePass> {
     ConversionTarget target(*context);
 
     target.addLegalDialect<StandardOpsDialect>();
+    target.addLegalDialect<scf::SCFDialect>();
 
     populateStdBufferizePatterns(context, typeConverter, patterns);
-    target.addIllegalOp<ExtractElementOp, TensorCastOp, TensorFromElementsOp>();
+    target.addIllegalOp<DynamicTensorFromElementsOp, ExtractElementOp,
+                        TensorCastOp, TensorFromElementsOp>();
 
     if (failed(applyPartialConversion(getFunction(), target, patterns)))
       signalPassFailure();

diff  --git a/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt b/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt
index 182f03e8dd55..abdd05f56387 100644
--- a/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt
@@ -13,6 +13,7 @@ add_mlir_dialect_library(MLIRStandardOpsTransforms
   LINK_LIBS PUBLIC
   MLIRIR
   MLIRPass
+  MLIRSCF
   MLIRStandard
   MLIRTransforms
   )

diff  --git a/mlir/lib/Dialect/StandardOps/Transforms/PassDetail.h b/mlir/lib/Dialect/StandardOps/Transforms/PassDetail.h
index 4748bf83ab99..9e3bbbb45462 100644
--- a/mlir/lib/Dialect/StandardOps/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/StandardOps/Transforms/PassDetail.h
@@ -9,6 +9,7 @@
 #ifndef DIALECT_STANDARD_TRANSFORMS_PASSDETAIL_H_
 #define DIALECT_STANDARD_TRANSFORMS_PASSDETAIL_H_
 
+#include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {

diff  --git a/mlir/test/Dialect/Standard/bufferize.mlir b/mlir/test/Dialect/Standard/bufferize.mlir
index d16a5dd6d9d4..1ba092aa127c 100644
--- a/mlir/test/Dialect/Standard/bufferize.mlir
+++ b/mlir/test/Dialect/Standard/bufferize.mlir
@@ -1,5 +1,54 @@
 // RUN: mlir-opt %s -std-bufferize | FileCheck %s
 
+// CHECK-LABEL:   func @dynamic_tensor_from_elements(
+// CHECK-SAME:                                       %[[ARG:.*]]: tensor<*xf32>,
+// CHECK-SAME:                                       %[[DYNAMIC_EXTENT:.*]]: index) -> tensor<?xindex> {
+// CHECK:           %[[MEMREF:.*]] = alloc(%[[DYNAMIC_EXTENT]]) : memref<?xindex>
+// CHECK:           %[[C0:.*]] = constant 0 : index
+// CHECK:           %[[C1:.*]] = constant 1 : index
+// CHECK:           scf.parallel (%[[I:.*]]) = (%[[C0]]) to (%[[DYNAMIC_EXTENT]]) step (%[[C1]]) {
+// CHECK:             %[[ELEM:.*]] = dim %[[ARG]], %[[I]] : tensor<*xf32>
+// CHECK:             store %[[ELEM]], %[[MEMREF]][%[[I]]] : memref<?xindex>
+// CHECK:             scf.yield
+// CHECK:           }
+// CHECK:           %[[RET:.*]] = tensor_load %[[MEMREF]] : memref<?xindex>
+// CHECK:           return %[[RET]] : tensor<?xindex>
+// CHECK:         }
+func @dynamic_tensor_from_elements(%arg: tensor<*xf32>, %rank: index) -> tensor<?xindex> {
+  %result = dynamic_tensor_from_elements %rank {
+  ^bb0(%i : index):
+    %elem = dim %arg, %i : tensor<*xf32>
+    yield %elem : index
+  } : tensor<?xindex>
+  return %result : tensor<?xindex>
+}
+
+// Additional test that checks the logic for intermixed static and dynamic
+// extents.
+//
+// CHECK-LABEL:   func @dynamic_tensor_from_elements_static_and_dynamic(
+// CHECK-SAME:                                                          %[[DYNAMIC_EXTENT:.*]]: index) -> tensor<16x?xindex> {
+// CHECK:           %[[MEMREF:.*]] = alloc(%[[DYNAMIC_EXTENT]]) : memref<16x?xindex>
+// CHECK:           %[[C0:.*]] = constant 0 : index
+// CHECK:           %[[C1:.*]] = constant 1 : index
+// CHECK:           %[[C16:.*]] = constant 16 : index
+// CHECK:           scf.parallel (%[[I:.*]], %[[J:.*]]) = (%[[C0]], %[[C0]]) to (%[[C16]], %[[DYNAMIC_EXTENT]]) step (%[[C1]], %[[C1]]) {
+// CHECK:             %[[VAL_7:.*]] = addi %[[I]], %[[J]] : index
+// CHECK:             store %[[VAL_7]], %[[MEMREF]][%[[I]], %[[J]]] : memref<16x?xindex>
+// CHECK:             scf.yield
+// CHECK:           }
+// CHECK:           %[[RET:.*]] = tensor_load %[[MEMREF]] : memref<16x?xindex>
+// CHECK:           return %[[RET]] : tensor<16x?xindex>
+// CHECK:         }
+func @dynamic_tensor_from_elements_static_and_dynamic(%arg0: index) -> tensor<16x?xindex> {
+  %result = dynamic_tensor_from_elements %arg0 {
+  ^bb0(%i: index, %j: index):
+    %sum = addi %i, %j : index
+    yield %sum : index
+  } : tensor<16x?xindex>
+  return %result : tensor<16x?xindex>
+}
+
 // CHECK-LABEL:   func @extract_element(
 // CHECK-SAME:                          %[[TENSOR:.*]]: tensor<?xf32>,
 // CHECK-SAME:                          %[[IDX:.*]]: index) -> f32 {