[Mlir-commits] [mlir] c0b0b6a - [mlir][bufferize] Infer memory space in all bufferization patterns

Mon Jun 27 07:33:25 PDT 2022

Author: Matthias Springer
Date: 2022-06-27T16:32:52+02:00
New Revision: c0b0b6a00a2c54a2c5fdc5626d240351021def5f

URL: https://github.com/llvm/llvm-project/commit/c0b0b6a00a2c54a2c5fdc5626d240351021def5f
DIFF: https://github.com/llvm/llvm-project/commit/c0b0b6a00a2c54a2c5fdc5626d240351021def5f.diff

LOG: [mlir][bufferize] Infer memory space in all bufferization patterns

This change updates all remaining bufferization patterns (except for scf.while) and the remaining bufferization infrastructure to infer the memory space whenever possible instead of falling back to "0". (If a default memory space is set in the bufferization options, we still fall back to that value if the memory space could not be inferred.)

Differential Revision: https://reviews.llvm.org/D128423

Added: 
    mlir/test/Dialect/Arithmetic/one-shot-bufferize-memory-space-invalid.mlir
    mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space-invalid.mlir
    mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space.mlir
    mlir/test/Dialect/SCF/one-shot-bufferize-invalid.mlir

Modified: 
    mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td
    mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
    mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
    mlir/lib/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.cpp
    mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
    mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
    mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
    mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
    mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
    mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-memory-space-invalid.mlir
    mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
    mlir/test/Dialect/SCF/one-shot-bufferize.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td
index 8f739d758a91d..61caa18561d34 100644

--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td
@@ -355,6 +355,25 @@ def BufferizableOpInterface : OpInterface<"BufferizableOpInterface"> {
           return bufferization::getMemRefType(tensorType, options);
         }]
       >,
+      InterfaceMethod<
+        /*desc=*/[{
+          Return the memory space of the given tensor OpResult if specified on
+          this op. If not specified, return `failure`.
+
+          This method will never be called with OpResults that do not bufferize
+          to a memory allocation.
+        }],
+        /*retType=*/"FailureOr<unsigned>",
+        /*methodName=*/"getMemorySpace",
+        /*args=*/(ins "OpResult":$opResult),
+        /*methodBody=*/"",
+        /*defaultImplementation=*/[{
+          assert(cast<BufferizableOpInterface>($_op.getOperation())
+                     .bufferizesToAllocation(opResult)
+                 && "expected allocation");
+          return failure();
+        }]
+      >,
   ];
 
   let extraClassDeclaration = [{

diff  --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
index 81dadee852e3e..df93e0eeb5dff 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
@@ -47,8 +47,9 @@ def Bufferization_AllocTensorOp : Bufferization_Op<"alloc_tensor",
     another op.
 
     The optional `memory_space` attribute specifies the memory space when
-    bufferizing this op. If `memory_space` is not specified, the default memory
-    space is used during bufferization.
+    bufferizing this op. The memory space is inferred from `copy` if specified.
+    If neigher `copy` nor `memory_space` is specified, the default memory space
+    is used during bufferization.
 
     Both dense and sparse tensor types are supported. The result of a
     `bufferization.alloc_tensor` is a tensor value that can be used like any
@@ -81,6 +82,12 @@ def Bufferization_AllocTensorOp : Bufferization_Op<"alloc_tensor",
 
     bool bufferizesToAllocation(OpResult opResult) { return true; }
 
+    FailureOr<unsigned> getMemorySpace(OpResult opResult) {
+      if (getMemorySpace().hasValue())
+        return static_cast<unsigned>(*getMemorySpace());
+      return failure();
+    }
+
     bool bufferizesToMemoryRead(OpOperand &opOperand,
                                 const AnalysisState &state);
 

diff  --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
index 6cf0dfa7ff24d..97684fb4a9310 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -347,6 +347,10 @@ def TensorCopyInsertion : Pass<"tensor-copy-insertion"> {
            "Bufferize function boundaries (experimental).">,
     Option<"createDeallocs", "create-deallocs", "bool", /*default=*/"true",
            "Specify if new allocations should be deallocated.">,
+    Option<"mustInferMemorySpace", "must-infer-memory-space", "bool",
+           /*default=*/"false",
+           "The memory space of an memref types must always be inferred. If "
+           "unset, a default memory space of 0 is used otherwise.">,
   ];
   let constructor = "mlir::bufferization::createTensorCopyInsertionPass()";
 }

diff  --git a/mlir/lib/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.cpp
index 24657bf3558bf..abfed7d5adc6d 100644
--- a/mlir/lib/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -26,6 +26,11 @@ struct ConstantOpInterface
                           const BufferizationOptions &options) const {
     auto constantOp = cast<arith::ConstantOp>(op);
 
+    // TODO: Implement memory space for this op. E.g., by adding a memory_space
+    // attribute to ConstantOp.
+    if (options.defaultMemorySpace != static_cast<unsigned>(0))
+      return op->emitError("memory space not implemented yet");
+
     // Only ranked tensors are supported.
     if (!constantOp.getType().isa<RankedTensorType>())
       return failure();
@@ -150,6 +155,10 @@ struct SelectOpInterface
       return failure();
     Value trueBuffer = *maybeTrueBuffer;
     Value falseBuffer = *maybeFalseBuffer;
+    BaseMemRefType trueType = trueBuffer.getType().cast<BaseMemRefType>();
+    BaseMemRefType falseType = falseBuffer.getType().cast<BaseMemRefType>();
+    if (trueType.getMemorySpaceAsInt() != falseType.getMemorySpaceAsInt())
+      return op->emitError("inconsistent memory space on true/false operands");
 
     // The "true" and the "false" operands must have the same type. If the
     // buffers have 
diff erent types, they 
diff er only in their layout map. Cast

diff  --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
index 6073c931e53ae..374fbd7da664d 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -43,6 +43,13 @@ using namespace bufferization;
 constexpr const ::llvm::StringLiteral
     bufferization::BufferizableOpInterface::kInplaceableAttrName;
 
+/// Return the owner of the given value.
+static Operation *getOwnerOfValue(Value value) {
+  if (auto opResult = value.dyn_cast<OpResult>())
+    return opResult.getDefiningOp();
+  return value.cast<BlockArgument>().getOwner()->getParentOp();
+}
+
 /// Create an AllocTensorOp for the given shaped value. If `copy` is set, the
 /// shaped value is copied. Otherwise, a tensor with undefined contents is
 /// allocated.
@@ -84,10 +91,21 @@ FailureOr<Value> bufferization::allocateTensorForShapedValue(
       populateDynamicDimSizes(b, loc, tensor, dynamicSizes);
   }
 
+  // Create AllocTensorOp.
   auto allocTensorOp = b.create<AllocTensorOp>(loc, tensorType, dynamicSizes,
                                                copy ? tensor : Value());
   allocTensorOp->setAttr(BufferizationDialect::kEscapeAttrName,
                          b.getBoolArrayAttr({escape}));
+
+  // Add 'memory_space' attribute. Not needed if 'copy' operand is specified.
+  if (copy)
+    return allocTensorOp.getResult();
+  FailureOr<BaseMemRefType> copyBufferType = getBufferType(tensor, options);
+  if (failed(copyBufferType))
+    return failure();
+  allocTensorOp.setMemorySpaceAttr(
+      b.getIntegerAttr(b.getIntegerType(64, /*isSigned=*/false),
+                       copyBufferType->getMemorySpaceAsInt()));
   return allocTensorOp.getResult();
 }
 
@@ -512,16 +530,43 @@ FailureOr<BaseMemRefType>
 bufferization::getBufferType(Value value, const BufferizationOptions &options) {
   auto tensorType = value.getType().dyn_cast<TensorType>();
   assert(tensorType && "unexpected non-tensor type");
+  Operation *op = getOwnerOfValue(value);
 
+  // ToTensorOp: Take buffer type directly from the op.
   if (auto toTensorOp = value.getDefiningOp<bufferization::ToTensorOp>())
     return toTensorOp.getMemref().getType().cast<BaseMemRefType>();
 
+  // If value is a bbArg of a bufferizable op: query op interface.
   if (auto bbArg = value.dyn_cast<BlockArgument>())
     if (auto bufferizableOp =
             options.dynCastBufferizableOp(bbArg.getOwner()->getParentOp()))
       return bufferizableOp.getBufferType(bbArg, options);
 
-  return getMemRefType(tensorType, options);
+  // Check value is a new buffer allocation with a memory space attribute. In
+  // that case we can at least infer the memory space.
+  Optional<unsigned> memorySpace = None;
+  if (auto opResult = value.dyn_cast<OpResult>()) {
+    if (auto bufferizableOp =
+            options.dynCastBufferizableOp(opResult.getDefiningOp())) {
+      if (bufferizableOp.bufferizesToAllocation(opResult)) {
+        FailureOr<unsigned> queriedMemorySpace =
+            bufferizableOp.getMemorySpace(opResult);
+        if (!failed(queriedMemorySpace))
+          memorySpace = *queriedMemorySpace;
+      }
+    }
+  }
+
+  // If we still do not know the memory space, use the default memory space (if
+  // any).
+  if (!memorySpace.hasValue())
+    memorySpace = options.defaultMemorySpace;
+
+  // If we still do not know the memory space, report a failure.
+  if (!memorySpace.hasValue())
+    return op->emitError("could not infer memory space");
+
+  return getMemRefType(tensorType, options, /*layout=*/{}, *memorySpace);
 }
 
 void bufferization::replaceOpWithBufferizedValues(RewriterBase &rewriter,

diff  --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
index 188c08b674c0e..679c4e8bbba3f 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@@ -176,6 +176,9 @@ LogicalResult AllocTensorOp::bufferize(RewriterBase &rewriter,
   unsigned memorySpace;
   if (getMemorySpace().hasValue()) {
     memorySpace = *getMemorySpace();
+  } else if (getCopy()) {
+    memorySpace =
+        copyBuffer.getType().cast<BaseMemRefType>().getMemorySpaceAsInt();
   } else if (options.defaultMemorySpace.hasValue()) {
     memorySpace = *options.defaultMemorySpace;
   } else {

diff  --git a/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
index cb320dc92f470..786f12f389f4b 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
@@ -105,6 +105,8 @@ struct TensorCopyInsertionPass
       options.allowReturnAllocs = allowReturnAllocs;
       options.bufferizeFunctionBoundaries = bufferizeFunctionBoundaries;
       options.createDeallocs = createDeallocs;
+      if (mustInferMemorySpace)
+        options.defaultMemorySpace = None;
       if (failed(insertTensorCopies(getOperation(), options)))
         signalPassFailure();
     }

diff  --git a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
index 0bf4fd3405dd4..3af3e5dc59533 100644
--- a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -707,6 +707,8 @@ struct WhileOpInterface
     return success();
   }
 
+  // TODO: Implement getBufferType interface method and infer buffer types.
+
   LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
                           const BufferizationOptions &options) const {
     auto whileOp = cast<scf::WhileOp>(op);

diff  --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
index 6f24843522103..4d15aced2830a 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -379,6 +379,10 @@ struct FromElementsOpInterface
                           const BufferizationOptions &options) const {
     auto fromElementsOp = cast<tensor::FromElementsOp>(op);
 
+    // TODO: Implement memory space for this op.
+    if (options.defaultMemorySpace != static_cast<unsigned>(0))
+      return op->emitError("memory space not implemented yet");
+
     // Allocate a buffer for the result.
     Location loc = op->getLoc();
     auto tensorType = fromElementsOp.getType().cast<RankedTensorType>();
@@ -435,6 +439,11 @@ struct GenerateOpInterface
   LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
                           const BufferizationOptions &options) const {
     auto generateOp = cast<tensor::GenerateOp>(op);
+
+    // TODO: Implement memory space for this op.
+    if (options.defaultMemorySpace != static_cast<unsigned>(0))
+      return op->emitError("memory space not implemented yet");
+
     auto tensorType = generateOp.getType().cast<RankedTensorType>();
     // Allocate memory.
     Location loc = op->getLoc();
@@ -792,7 +801,9 @@ struct ReshapeOpInterface
     if (failed(srcBuffer) || failed(shapeBuffer))
       return failure();
     auto resultTensorType = reshapeOp.getResult().getType().cast<TensorType>();
-    auto resultMemRefType = getMemRefType(resultTensorType, options);
+    auto resultMemRefType = getMemRefType(
+        resultTensorType, options, /*layout=*/{},
+        srcBuffer->getType().cast<BaseMemRefType>().getMemorySpaceAsInt());
     replaceOpWithNewBufferizedOp<memref::ReshapeOp>(
         rewriter, op, resultMemRefType, *srcBuffer, *shapeBuffer);
     return success();

diff  --git a/mlir/test/Dialect/Arithmetic/one-shot-bufferize-memory-space-invalid.mlir b/mlir/test/Dialect/Arithmetic/one-shot-bufferize-memory-space-invalid.mlir
new file mode 100644
index 0000000000000..315da00a00d78
--- /dev/null
+++ b/mlir/test/Dialect/Arithmetic/one-shot-bufferize-memory-space-invalid.mlir
@@ -0,0 +1,22 @@
+// RUN: mlir-opt %s -one-shot-bufferize="must-infer-memory-space" -split-input-file -verify-diagnostics
+
+func.func @inconsistent_memory_space_arith_select(%c: i1) -> tensor<10xf32> {
+  // Selecting tensors with 
diff erent memory spaces. Such IR cannot be
+  // bufferized.
+  %0 = bufferization.alloc_tensor() {memory_space = 0 : ui64} : tensor<10xf32>
+  %1 = bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<10xf32>
+  // expected-error @+2 {{inconsistent memory space on true/false operands}}
+  // expected-error @+1 {{failed to bufferize op}}
+  %r = arith.select %c, %0, %1 : tensor<10xf32>
+  func.return %r : tensor<10xf32>
+}
+
+// -----
+
+func.func @constant_memory_space(%idx: index, %v: i32) -> tensor<3xi32> {
+  // expected-error @+2 {{memory space not implemented yet}}
+  // expected-error @+1 {{failed to bufferize op}}
+  %cst = arith.constant dense<[5, 1000, 20]> : tensor<3xi32>
+  %0 = tensor.insert %v into %cst[%idx] : tensor<3xi32>
+  return %0 : tensor<3xi32>
+}
\ No newline at end of file

diff  --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-memory-space-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-memory-space-invalid.mlir
index f9436543698a2..5feeab0bc89ca 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-memory-space-invalid.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-memory-space-invalid.mlir
@@ -6,3 +6,14 @@ func.func @alloc_tensor_without_memory_space() -> tensor<10xf32> {
   %0 = bufferization.alloc_tensor() : tensor<10xf32>
   return %0 : tensor<10xf32>
 }
+
+// -----
+
+func.func @memory_space_of_unknown_op() -> f32 {
+  %c0 = arith.constant 0 : index
+  // expected-error @+1 {{could not infer memory space}}
+  %t = "test.dummy_op"() : () -> (tensor<10xf32>)
+  // expected-error @+1 {{failed to bufferize op}}
+  %s = tensor.extract %t[%c0] : tensor<10xf32>
+  return %s : f32
+}

diff  --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space-invalid.mlir
new file mode 100644
index 0000000000000..ba71090ec1ba8
--- /dev/null
+++ b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space-invalid.mlir
@@ -0,0 +1,12 @@
+// RUN: mlir-opt %s -tensor-copy-insertion="must-infer-memory-space" -split-input-file -verify-diagnostics
+
+// An alloc is inserted but the copy is emitted. Therefore, the memory space
+// should be specified on the alloc_tensor op.
+func.func @memory_space_of_unknown_op() -> (tensor<10xf32>, tensor<10xf32>) {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  // expected-error @+1 {{could not infer memory space}}
+  %t = bufferization.alloc_tensor() : tensor<10xf32>
+  %s = tensor.insert %cst into %t[%c0] : tensor<10xf32>
+  return %s, %t : tensor<10xf32>, tensor<10xf32>
+}

diff  --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space.mlir
new file mode 100644
index 0000000000000..1397ccd1b0484
--- /dev/null
+++ b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space.mlir
@@ -0,0 +1,25 @@
+// RUN: mlir-opt %s -tensor-copy-insertion="must-infer-memory-space" -split-input-file | FileCheck %s
+
+// CHECK-LABEL: func @unknown_op_copy
+func.func @unknown_op_copy() -> (tensor<10xf32>, tensor<10xf32>) {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  // CHECK: %[[dummy:.*]] = "test.dummy_op"() : () -> tensor<10xf32>
+  %t = "test.dummy_op"() : () -> tensor<10xf32>
+  // CHECK: %[[copy:.*]] = bufferization.alloc_tensor() copy(%[[dummy]]) {bufferization.escape = [false]} : tensor<10xf32>
+  %s = tensor.insert %cst into %t[%c0] : tensor<10xf32>
+  return %s, %t : tensor<10xf32>, tensor<10xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @alloc_tensor_copy
+func.func @alloc_tensor_copy() -> (tensor<10xf32>, tensor<10xf32>) {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 1 : ui64} : tensor<10xf32>
+  %t = bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<10xf32>
+  // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 1 : ui64} : tensor<10xf32>
+  %s = tensor.insert %cst into %t[%c0] : tensor<10xf32>
+  return %s, %t : tensor<10xf32>, tensor<10xf32>
+}

diff  --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
index 5bc08c69305fc..b8646edc79e72 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
@@ -40,10 +40,10 @@ func.func @do_not_copy_undefined_tensor(%f: f32, %idx: index)
 {
   // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false]} : tensor<5xf32>
   // The second alloc_tensor should not have a copy operand.
-  // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false]} : tensor<5xf32>
+  // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : ui64} : tensor<5xf32>
 
   // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {bufferization.escape = [true]} : tensor<5xf32>
-  // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {bufferization.escape = [true]} : tensor<5xf32>
+  // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {bufferization.escape = [true], memory_space = 0 : ui64} : tensor<5xf32>
   %0 = bufferization.alloc_tensor() : tensor<5xf32>
   %1 = tensor.insert %f into %0[%idx] : tensor<5xf32>
   return %0, %1 : tensor<5xf32>, tensor<5xf32>
@@ -55,7 +55,7 @@ func.func @do_not_copy_undefined_tensor(%f: f32, %idx: index)
 func.func @do_not_copy_when_overwritten(%t: tensor<5xf32>, %f: f32)
   -> (tensor<5xf32>, tensor<5xf32>)
 {
-  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false]} : tensor<5xf32>
+  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : ui64} : tensor<5xf32>
   // CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<5xf32>)
   %r = linalg.generic {
     indexing_maps = [affine_map<(d0) -> (d0)>],
@@ -74,7 +74,7 @@ func.func @do_not_copy_when_result_not_read(%t: tensor<5xf32>, %f: f32)
   -> (tensor<3xf32>)
 {
   %0 = tensor.extract_slice %t[0][3][1] : tensor<5xf32> to tensor<3xf32>
-  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false]} : tensor<3xf32>
+  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : ui64} : tensor<3xf32>
   // CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<3xf32>)
   %r = linalg.generic {
     indexing_maps = [affine_map<(d0) -> (d0)>],

diff  --git a/mlir/test/Dialect/SCF/one-shot-bufferize-invalid.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-invalid.mlir
new file mode 100644
index 0000000000000..52338d0701be3
--- /dev/null
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize-invalid.mlir
@@ -0,0 +1,16 @@
+// RUN: mlir-opt %s -one-shot-bufferize -split-input-file -verify-diagnostics
+
+func.func @inconsistent_memory_space_scf_if(%c: i1) -> tensor<10xf32> {
+  // Yielding tensors with 
diff erent memory spaces. Such IR cannot be
+  // bufferized.
+  %0 = bufferization.alloc_tensor() {memory_space = 0 : ui64} : tensor<10xf32>
+  %1 = bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<10xf32>
+  // expected-error @+2 {{inconsistent memory space on then/else branches}}
+  // expected-error @+1 {{failed to bufferize op}}
+  %r = scf.if %c -> tensor<10xf32> {
+    scf.yield %0 : tensor<10xf32>
+  } else {
+    scf.yield %1 : tensor<10xf32>
+  }
+  func.return %r : tensor<10xf32>
+}

diff  --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
index 5ea23b8e65b47..e2293cc4c456d 100644
--- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
@@ -632,3 +632,80 @@ func.func @matmul(%arg0: tensor<8x8xf32>, %arg1: tensor<8x8xf32>, %arg2: tensor<
   }
   return %0 : tensor<8x8xf32>
 }
+
+// -----
+
+// CHECK-LABEL: func @scf_if_memory_space
+func.func @scf_if_memory_space(%c: i1, %f: f32) -> (f32, f32)
+{
+  %c0 = arith.constant 0 : index
+  // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32, 1>
+  %0 = bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<5xf32>
+  // CHECK: scf.if %{{.*}} -> (memref<5xf32, 1>) {
+  %1 = scf.if %c -> tensor<5xf32> {
+    // CHECK: %[[cloned:.*]] = bufferization.clone %[[alloc]]
+    // CHECK: scf.yield %[[cloned]]
+    scf.yield %0 : tensor<5xf32>
+  } else {
+    // CHECK: %[[alloc2:.*]] = memref.alloc() {{.*}} : memref<5xf32, 1>
+    // CHECK: memref.store %{{.*}}, %[[alloc2]]
+    // CHECK: %[[cloned2:.*]] = bufferization.clone %[[alloc2]]
+    // CHECK: memref.dealloc %[[alloc2]]
+    // CHECK: scf.yield %[[cloned2]]
+    %2 = tensor.insert %f into %0[%c0] : tensor<5xf32>
+    scf.yield %2 : tensor<5xf32>
+  }
+  %r0 = tensor.extract %0[%c0] : tensor<5xf32>
+  %r1 = tensor.extract %1[%c0] : tensor<5xf32>
+  return %r0, %r1 : f32, f32
+}
+
+// -----
+
+// CHECK-LABEL: func @scf_execute_region_memory_space
+// CHECK: memref.alloc() {{.*}} : memref<5xf32, 1>
+// CHECK: memref.store
+// CHECK: memref.load
+// CHECK: memref.dealloc
+func.func @scf_execute_region_memory_space(%f: f32) -> f32 {
+  %c0 = arith.constant 0 : index
+  %0 = scf.execute_region -> tensor<5xf32> {
+    %1 = bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<5xf32>
+    %2 = tensor.insert %f into %1[%c0] : tensor<5xf32>
+    scf.yield %2 : tensor<5xf32>
+  }
+  %r = tensor.extract %0[%c0] : tensor<5xf32>
+  return %r : f32
+}
+
+// -----
+
+// Additional allocs are inserted in the loop body. We just check that all
+// allocs have the correct memory space.
+
+// CHECK-LABEL: func @scf_for_swapping_yields_memory_space
+func.func @scf_for_swapping_yields_memory_space(
+    %sz: index, %C : tensor<4xf32>, %lb : index, %ub : index, %step : index)
+  -> (f32, f32)
+{
+  // CHECK: memref.alloc(%{{.*}}) {{.*}} : memref<?xf32, 1>
+  // CHECK: memref.alloc(%{{.*}}) {{.*}} : memref<?xf32, 1>
+  %A = bufferization.alloc_tensor(%sz) {memory_space = 1 : ui64} : tensor<?xf32>
+  %B = bufferization.alloc_tensor(%sz) {memory_space = 1 : ui64} : tensor<?xf32>
+
+  // CHECK: scf.for {{.*}} {
+  %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
+      -> (tensor<?xf32>, tensor<?xf32>)
+  {
+    // CHECK: memref.alloc(%{{.*}}) {{.*}} : memref<?xf32, 1>
+    // CHECK: memref.alloc(%{{.*}}) {{.*}} : memref<?xf32, 1>
+    %ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor<?xf32>
+    %ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor<?xf32>
+    // Yield tensors in 
diff erent order.
+    scf.yield %ttB, %ttA : tensor<?xf32>, tensor<?xf32>
+  }
+  // CHECK: }
+  %f0 = tensor.extract %r0#0[%step] : tensor<?xf32>
+  %f1 = tensor.extract %r0#1[%step] : tensor<?xf32>
+  return %f0, %f1: f32, f32
+}