[Mlir-commits] [mlir] 75e5f0a - [mlir] factor memref-to-llvm lowering out of std-to-llvm

Fri Jul 9 05:50:05 PDT 2021

Author: Alex Zinenko
Date: 2021-07-09T14:49:52+02:00
New Revision: 75e5f0aac97dca0507a391f3418b18f4f6e44cd7

URL: https://github.com/llvm/llvm-project/commit/75e5f0aac97dca0507a391f3418b18f4f6e44cd7
DIFF: https://github.com/llvm/llvm-project/commit/75e5f0aac97dca0507a391f3418b18f4f6e44cd7.diff

LOG: [mlir] factor memref-to-llvm lowering out of std-to-llvm

After the MemRef has been split out of the Standard dialect, the
conversion to the LLVM dialect remained as a huge monolithic pass.
This is undesirable for the same complexity management reasons as having
a huge Standard dialect itself, and is even more confusing given the
existence of a separate dialect. Extract the conversion of the MemRef
dialect operations to LLVM into a separate library and a separate
conversion pass.

Reviewed By: herhut, silvas

Differential Revision: https://reviews.llvm.org/D105625

Added: 
    mlir/include/mlir/Conversion/LLVMCommon/ConversionTarget.h
    mlir/include/mlir/Conversion/MemRefToLLVM/AllocLikeConversion.h
    mlir/include/mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h
    mlir/lib/Conversion/LLVMCommon/ConversionTarget.cpp
    mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp
    mlir/lib/Conversion/MemRefToLLVM/CMakeLists.txt
    mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
    mlir/test/Conversion/MemRefToLLVM/convert-alloca-scope.mlir
    mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir
    mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir
    mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir
    mlir/test/Conversion/StandardToLLVM/func-memref.mlir

Modified: 
    mlir/examples/toy/Ch6/CMakeLists.txt
    mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp
    mlir/examples/toy/Ch7/CMakeLists.txt
    mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp
    mlir/include/mlir/Conversion/ComplexToLLVM/ComplexToLLVM.h
    mlir/include/mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h
    mlir/include/mlir/Conversion/OpenACCToLLVM/ConvertOpenACCToLLVM.h
    mlir/include/mlir/Conversion/Passes.h
    mlir/include/mlir/Conversion/Passes.td
    mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
    mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
    mlir/lib/Conversion/AsyncToLLVM/CMakeLists.txt
    mlir/lib/Conversion/CMakeLists.txt
    mlir/lib/Conversion/ComplexToLLVM/CMakeLists.txt
    mlir/lib/Conversion/ComplexToLLVM/ComplexToLLVM.cpp
    mlir/lib/Conversion/GPUCommon/CMakeLists.txt
    mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
    mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
    mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h
    mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
    mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt
    mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
    mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp
    mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt
    mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
    mlir/lib/Conversion/LLVMCommon/CMakeLists.txt
    mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt
    mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp
    mlir/lib/Conversion/OpenACCToLLVM/CMakeLists.txt
    mlir/lib/Conversion/OpenACCToLLVM/OpenACCToLLVM.cpp
    mlir/lib/Conversion/OpenMPToLLVM/CMakeLists.txt
    mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
    mlir/lib/Conversion/SPIRVToLLVM/CMakeLists.txt
    mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp
    mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp
    mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVMPass.cpp
    mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt
    mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
    mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt
    mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
    mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
    mlir/lib/Conversion/VectorToROCDL/CMakeLists.txt
    mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp
    mlir/lib/Dialect/AMX/Transforms/CMakeLists.txt
    mlir/lib/Dialect/AMX/Transforms/LegalizeForLLVMExport.cpp
    mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt
    mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp
    mlir/lib/Dialect/X86Vector/Transforms/CMakeLists.txt
    mlir/lib/Dialect/X86Vector/Transforms/LegalizeForLLVMExport.cpp
    mlir/test/Conversion/StandardToLLVM/calling-convention.mlir
    mlir/test/Conversion/StandardToLLVM/convert-argattrs.mlir
    mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir
    mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
    mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir
    mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir
    mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/rank-reducing-subview.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-ncw-filter-wcf-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nchw-filter-hwcf-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ncdhw-filter-dhwcf-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-elementwise.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
    mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir
    mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
    mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
    mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
    mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir
    mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir
    mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir
    mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir
    mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
    mlir/test/Integration/Dialect/Standard/CPU/test_subview.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-compress.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-expand.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-gather.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-maskedload.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-maskedstore.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-scatter.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-sparse-dot-matvec.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-sparse-saxpy-jagged-matvec.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-transfer-write.mlir
    mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
    mlir/test/lib/Conversion/StandardToLLVM/CMakeLists.txt
    mlir/test/lib/Conversion/StandardToLLVM/TestConvertCallOp.cpp
    mlir/test/mlir-cpu-runner/async-value.mlir
    mlir/test/mlir-cpu-runner/async.mlir
    mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir
    mlir/test/mlir-cpu-runner/copy.mlir
    mlir/test/mlir-cpu-runner/global_memref.mlir
    mlir/test/mlir-cpu-runner/memref_reinterpret_cast.mlir
    mlir/test/mlir-cpu-runner/memref_reshape.mlir
    mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir
    mlir/test/mlir-cpu-runner/unranked_memref.mlir
    mlir/test/mlir-cpu-runner/utils.mlir
    mlir/test/python/execution_engine.py
    mlir/test/python/integration/dialects/linalg/opsrun.py
    mlir/tools/mlir-vulkan-runner/CMakeLists.txt
    mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp
    mlir/unittests/ExecutionEngine/CMakeLists.txt
    mlir/unittests/ExecutionEngine/Invoke.cpp

Removed: 
    mlir/test/Conversion/StandardToLLVM/convert-alloca-scope.mlir
    mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir
    mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir
    mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir


################################################################################
diff  --git a/mlir/examples/toy/Ch6/CMakeLists.txt b/mlir/examples/toy/Ch6/CMakeLists.txt
index 62579658e9af0..6f493958fc296 100644

--- a/mlir/examples/toy/Ch6/CMakeLists.txt
+++ b/mlir/examples/toy/Ch6/CMakeLists.txt
@@ -42,8 +42,10 @@ target_link_libraries(toyc-ch6
     MLIRCastInterfaces
     MLIRExecutionEngine
     MLIRIR
+    MLIRLLVMCommonConversion
     MLIRLLVMIR
     MLIRLLVMToLLVMIRTranslation
+    MLIRMemRef
     MLIRParser
     MLIRPass
     MLIRSideEffectInterfaces

diff  --git a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp
index 8612a5b9a02f4..888e4708b9c93 100644
--- a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp
+++ b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp
@@ -25,6 +25,9 @@
 #include "toy/Passes.h"
 
 #include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
@@ -195,6 +198,7 @@ void ToyToLLVMLoweringPass::runOnOperation() {
   RewritePatternSet patterns(&getContext());
   populateAffineToStdConversionPatterns(patterns);
   populateLoopToStdConversionPatterns(patterns);
+  populateMemRefToLLVMConversionPatterns(typeConverter, patterns);
   populateStdToLLVMConversionPatterns(typeConverter, patterns);
 
   // The only remaining operation to lower from the `toy` dialect, is the

diff  --git a/mlir/examples/toy/Ch7/CMakeLists.txt b/mlir/examples/toy/Ch7/CMakeLists.txt
index cda24c48a5aa6..6055038e39a93 100644
--- a/mlir/examples/toy/Ch7/CMakeLists.txt
+++ b/mlir/examples/toy/Ch7/CMakeLists.txt
@@ -42,7 +42,9 @@ target_link_libraries(toyc-ch7
     MLIRCastInterfaces
     MLIRExecutionEngine
     MLIRIR
+    MLIRLLVMCommonConversion
     MLIRLLVMToLLVMIRTranslation
+    MLIRMemRef
     MLIRParser
     MLIRPass
     MLIRSideEffectInterfaces

diff  --git a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp
index 8612a5b9a02f4..888e4708b9c93 100644
--- a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp
+++ b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp
@@ -25,6 +25,9 @@
 #include "toy/Passes.h"
 
 #include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
@@ -195,6 +198,7 @@ void ToyToLLVMLoweringPass::runOnOperation() {
   RewritePatternSet patterns(&getContext());
   populateAffineToStdConversionPatterns(patterns);
   populateLoopToStdConversionPatterns(patterns);
+  populateMemRefToLLVMConversionPatterns(typeConverter, patterns);
   populateStdToLLVMConversionPatterns(typeConverter, patterns);
 
   // The only remaining operation to lower from the `toy` dialect, is the

diff  --git a/mlir/include/mlir/Conversion/ComplexToLLVM/ComplexToLLVM.h b/mlir/include/mlir/Conversion/ComplexToLLVM/ComplexToLLVM.h
index babd0336b227d..b7f10ad07045a 100644
--- a/mlir/include/mlir/Conversion/ComplexToLLVM/ComplexToLLVM.h
+++ b/mlir/include/mlir/Conversion/ComplexToLLVM/ComplexToLLVM.h
@@ -9,13 +9,13 @@
 #define MLIR_CONVERSION_COMPLEXTOLLVM_COMPLEXTOLLVM_H_
 
 #include "mlir/Conversion/LLVMCommon/StructBuilder.h"
-#include "mlir/Transforms/DialectConversion.h"
 
 namespace mlir {
 class LLVMTypeConverter;
 class ModuleOp;
 template <typename T>
 class OperationPass;
+class RewritePatternSet;
 
 class ComplexStructBuilder : public StructBuilder {
 public:

diff  --git a/mlir/include/mlir/Conversion/LLVMCommon/ConversionTarget.h b/mlir/include/mlir/Conversion/LLVMCommon/ConversionTarget.h
new file mode 100644
index 0000000000000..facd9f4e7e4e6
--- /dev/null
+++ b/mlir/include/mlir/Conversion/LLVMCommon/ConversionTarget.h
@@ -0,0 +1,23 @@
+//===- ConversionTarget.h - LLVM dialect conversion target ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_CONVERSION_LLVMCOMMON_CONVERSIONTARGET_H
+#define MLIR_CONVERSION_LLVMCOMMON_CONVERSIONTARGET_H
+
+#include "mlir/Transforms/DialectConversion.h"
+
+namespace mlir {
+/// Derived class that automatically populates legalization information for
+/// 
diff erent LLVM ops.
+class LLVMConversionTarget : public ConversionTarget {
+public:
+  explicit LLVMConversionTarget(MLIRContext &ctx);
+};
+} // namespace mlir
+
+#endif // MLIR_CONVERSION_LLVMCOMMON_CONVERSIONTARGET_H

diff  --git a/mlir/include/mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h b/mlir/include/mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h
index a1f56048cd7f4..9973b38e75e31 100644
--- a/mlir/include/mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h
+++ b/mlir/include/mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h
@@ -8,14 +8,15 @@
 #ifndef MLIR_CONVERSION_LINALGTOLLVM_LINALGTOLLVM_H_
 #define MLIR_CONVERSION_LINALGTOLLVM_LINALGTOLLVM_H_
 
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
-#include "mlir/Transforms/DialectConversion.h"
+#include <memory>
 
 namespace mlir {
+class LLVMTypeConverter;
 class MLIRContext;
 class ModuleOp;
 template <typename T>
 class OperationPass;
+class RewritePatternSet;
 
 /// Populate the given list with patterns that convert from Linalg to LLVM.
 void populateLinalgToLLVMConversionPatterns(LLVMTypeConverter &converter,

diff  --git a/mlir/include/mlir/Conversion/MemRefToLLVM/AllocLikeConversion.h b/mlir/include/mlir/Conversion/MemRefToLLVM/AllocLikeConversion.h
new file mode 100644
index 0000000000000..a612ecce5bc31
--- /dev/null
+++ b/mlir/include/mlir/Conversion/MemRefToLLVM/AllocLikeConversion.h
@@ -0,0 +1,67 @@
+//===- AllocLikeConversion.h - Convert allocation ops to LLVM ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_CONVERSION_MEMREFTOLLVM_ALLOCLIKECONVERSION_H
+#define MLIR_CONVERSION_MEMREFTOLLVM_ALLOCLIKECONVERSION_H
+
+#include "mlir/Conversion/LLVMCommon/Pattern.h"
+
+namespace mlir {
+
+/// Lowering for AllocOp and AllocaOp.
+struct AllocLikeOpLLVMLowering : public ConvertToLLVMPattern {
+  using ConvertToLLVMPattern::createIndexConstant;
+  using ConvertToLLVMPattern::getIndexType;
+  using ConvertToLLVMPattern::getVoidPtrType;
+
+  explicit AllocLikeOpLLVMLowering(StringRef opName,
+                                   LLVMTypeConverter &converter)
+      : ConvertToLLVMPattern(opName, &converter.getContext(), converter) {}
+
+protected:
+  // Returns 'input' aligned up to 'alignment'. Computes
+  // bumped = input + alignement - 1
+  // aligned = bumped - bumped % alignment
+  static Value createAligned(ConversionPatternRewriter &rewriter, Location loc,
+                             Value input, Value alignment);
+
+  /// Allocates the underlying buffer. Returns the allocated pointer and the
+  /// aligned pointer.
+  virtual std::tuple<Value, Value>
+  allocateBuffer(ConversionPatternRewriter &rewriter, Location loc,
+                 Value sizeBytes, Operation *op) const = 0;
+
+private:
+  static MemRefType getMemRefResultType(Operation *op) {
+    return op->getResult(0).getType().cast<MemRefType>();
+  }
+
+  // An `alloc` is converted into a definition of a memref descriptor value and
+  // a call to `malloc` to allocate the underlying data buffer.  The memref
+  // descriptor is of the LLVM structure type where:
+  //   1. the first element is a pointer to the allocated (typed) data buffer,
+  //   2. the second element is a pointer to the (typed) payload, aligned to the
+  //      specified alignment,
+  //   3. the remaining elements serve to store all the sizes and strides of the
+  //      memref using LLVM-converted `index` type.
+  //
+  // Alignment is performed by allocating `alignment` more bytes than
+  // requested and shifting the aligned pointer relative to the allocated
+  // memory. Note: `alignment - <minimum malloc alignment>` would actually be
+  // sufficient. If alignment is unspecified, the two pointers are equal.
+
+  // An `alloca` is converted into a definition of a memref descriptor value and
+  // an llvm.alloca to allocate the underlying data buffer.
+  LogicalResult
+  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override;
+};
+
+} // namespace mlir
+
+#endif // MLIR_CONVERSION_MEMREFTOLLVM_ALLOCLIKECONVERSION_H

diff  --git a/mlir/include/mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h b/mlir/include/mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h
new file mode 100644
index 0000000000000..732f0bd4bb279
--- /dev/null
+++ b/mlir/include/mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h
@@ -0,0 +1,27 @@
+//===- MemRefToLLVM.h - MemRef to LLVM dialect conversion -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_CONVERSION_MEMREFTOLLVM_MEMREFTOLLVM_H
+#define MLIR_CONVERSION_MEMREFTOLLVM_MEMREFTOLLVM_H
+
+#include <memory>
+
+namespace mlir {
+class Pass;
+class LLVMTypeConverter;
+class RewritePatternSet;
+
+/// Collect a set of patterns to convert memory-related operations from the
+/// MemRef dialect to the LLVM dialect.
+void populateMemRefToLLVMConversionPatterns(LLVMTypeConverter &converter,
+                                            RewritePatternSet &patterns);
+
+std::unique_ptr<Pass> createMemRefToLLVMPass();
+} // namespace mlir
+
+#endif // MLIR_CONVERSION_MEMREFTOLLVM_MEMREFTOLLVM_H

diff  --git a/mlir/include/mlir/Conversion/OpenACCToLLVM/ConvertOpenACCToLLVM.h b/mlir/include/mlir/Conversion/OpenACCToLLVM/ConvertOpenACCToLLVM.h
index ef55f45d3d7f4..e6b682e0151d1 100644
--- a/mlir/include/mlir/Conversion/OpenACCToLLVM/ConvertOpenACCToLLVM.h
+++ b/mlir/include/mlir/Conversion/OpenACCToLLVM/ConvertOpenACCToLLVM.h
@@ -8,7 +8,7 @@
 #ifndef MLIR_CONVERSION_OPENACCTOLLVM_CONVERTOPENACCTOLLVM_H
 #define MLIR_CONVERSION_OPENACCTOLLVM_CONVERTOPENACCTOLLVM_H
 
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
+#include "mlir/Conversion/LLVMCommon/StructBuilder.h"
 #include <memory>
 
 namespace mlir {

diff  --git a/mlir/include/mlir/Conversion/Passes.h b/mlir/include/mlir/Conversion/Passes.h
index 0cac29f2b62f7..9b3b9701ce6c9 100644
--- a/mlir/include/mlir/Conversion/Passes.h
+++ b/mlir/include/mlir/Conversion/Passes.h
@@ -23,6 +23,7 @@
 #include "mlir/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.h"
 #include "mlir/Conversion/LinalgToStandard/LinalgToStandard.h"
 #include "mlir/Conversion/MathToLibm/MathToLibm.h"
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Conversion/OpenACCToLLVM/ConvertOpenACCToLLVM.h"
 #include "mlir/Conversion/OpenACCToSCF/ConvertOpenACCToSCF.h"
 #include "mlir/Conversion/OpenMPToLLVM/ConvertOpenMPToLLVM.h"

diff  --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index e436d0e3148f3..9fa437a895b12 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -255,6 +255,24 @@ def ConvertMathToLibm : Pass<"convert-math-to-libm", "ModuleOp"> {
   let dependentDialects = ["StandardOpsDialect", "vector::VectorDialect"];
 }
 
+//===----------------------------------------------------------------------===//
+// MemRefToLLVM
+//===----------------------------------------------------------------------===//
+
+def ConvertMemRefToLLVM : Pass<"convert-memref-to-llvm", "ModuleOp"> {
+  let summary = "Convert operations from the MemRef dialect to the LLVM "
+                "dialect";
+  let constructor = "mlir::createMemRefToLLVMPass()";
+  let dependentDialects = ["LLVM::LLVMDialect"];
+  let options = [
+    Option<"useAlignedAlloc", "use-aligned-alloc", "bool", /*default=*/"false",
+           "Use aligned_alloc in place of malloc for heap allocations">,
+    Option<"indexBitwidth", "index-bitwidth", "unsigned",
+           /*default=kDeriveIndexBitwidthFromDataLayout*/"0",
+           "Bitwidth of the index type, 0 to use size of machine word">,
+  ];
+}
+
 //===----------------------------------------------------------------------===//
 // OpenACCToSCF
 //===----------------------------------------------------------------------===//
@@ -434,8 +452,6 @@ def ConvertStandardToLLVM : Pass<"convert-std-to-llvm", "ModuleOp"> {
   let constructor = "mlir::createLowerToLLVMPass()";
   let dependentDialects = ["LLVM::LLVMDialect"];
   let options = [
-    Option<"useAlignedAlloc", "use-aligned-alloc", "bool", /*default=*/"false",
-           "Use aligned_alloc in place of malloc for heap allocations">,
     Option<"useBarePtrCallConv", "use-bare-ptr-memref-call-conv", "bool",
            /*default=*/"false",
            "Replace FuncOp's MemRef arguments with bare pointers to the MemRef "

diff  --git a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
index 604556f3e0a47..7a72e794a1955 100644
--- a/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
+++ b/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
@@ -15,24 +15,12 @@
 #ifndef MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVM_H
 #define MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVM_H
 
-#include "mlir/Conversion/LLVMCommon/Pattern.h"
-
 namespace mlir {
 
+class MLIRContext;
 class LLVMTypeConverter;
 class RewritePatternSet;
 
-/// Collect a set of patterns to convert memory-related operations from the
-/// Standard dialect to the LLVM dialect, excluding non-memory-related
-/// operations and FuncOp.
-void populateStdToLLVMMemoryConversionPatterns(LLVMTypeConverter &converter,
-                                               RewritePatternSet &patterns);
-
-/// Collect a set of patterns to convert from the Standard dialect to the LLVM
-/// dialect, excluding the memory-related operations.
-void populateStdToLLVMNonMemoryConversionPatterns(LLVMTypeConverter &converter,
-                                                  RewritePatternSet &patterns);
-
 /// Collect the default pattern to convert a FuncOp to the LLVM dialect. If
 /// `emitCWrappers` is set, the pattern will also produce functions
 /// that pass memref descriptors by pointer-to-structure in addition to the
@@ -47,62 +35,6 @@ void populateStdToLLVMFuncOpConversionPattern(LLVMTypeConverter &converter,
 void populateStdToLLVMConversionPatterns(LLVMTypeConverter &converter,
                                          RewritePatternSet &patterns);
 
-/// Lowering for AllocOp and AllocaOp.
-struct AllocLikeOpLLVMLowering : public ConvertToLLVMPattern {
-  using ConvertToLLVMPattern::createIndexConstant;
-  using ConvertToLLVMPattern::getIndexType;
-  using ConvertToLLVMPattern::getVoidPtrType;
-
-  explicit AllocLikeOpLLVMLowering(StringRef opName,
-                                   LLVMTypeConverter &converter)
-      : ConvertToLLVMPattern(opName, &converter.getContext(), converter) {}
-
-protected:
-  // Returns 'input' aligned up to 'alignment'. Computes
-  // bumped = input + alignement - 1
-  // aligned = bumped - bumped % alignment
-  static Value createAligned(ConversionPatternRewriter &rewriter, Location loc,
-                             Value input, Value alignment);
-
-  /// Allocates the underlying buffer. Returns the allocated pointer and the
-  /// aligned pointer.
-  virtual std::tuple<Value, Value>
-  allocateBuffer(ConversionPatternRewriter &rewriter, Location loc,
-                 Value sizeBytes, Operation *op) const = 0;
-
-private:
-  static MemRefType getMemRefResultType(Operation *op) {
-    return op->getResult(0).getType().cast<MemRefType>();
-  }
-
-  // An `alloc` is converted into a definition of a memref descriptor value and
-  // a call to `malloc` to allocate the underlying data buffer.  The memref
-  // descriptor is of the LLVM structure type where:
-  //   1. the first element is a pointer to the allocated (typed) data buffer,
-  //   2. the second element is a pointer to the (typed) payload, aligned to the
-  //      specified alignment,
-  //   3. the remaining elements serve to store all the sizes and strides of the
-  //      memref using LLVM-converted `index` type.
-  //
-  // Alignment is performed by allocating `alignment` more bytes than
-  // requested and shifting the aligned pointer relative to the allocated
-  // memory. Note: `alignment - <minimum malloc alignment>` would actually be
-  // sufficient. If alignment is unspecified, the two pointers are equal.
-
-  // An `alloca` is converted into a definition of a memref descriptor value and
-  // an llvm.alloca to allocate the underlying data buffer.
-  LogicalResult
-  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const override;
-};
-
-/// Derived class that automatically populates legalization information for
-/// 
diff erent LLVM ops.
-class LLVMConversionTarget : public ConversionTarget {
-public:
-  explicit LLVMConversionTarget(MLIRContext &ctx);
-};
-
 } // namespace mlir
 
 #endif // MLIR_CONVERSION_STANDARDTOLLVM_CONVERTSTANDARDTOLLVM_H

diff  --git a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
index 7d921839cf287..b14b75c96ee7b 100644
--- a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
+++ b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
@@ -9,6 +9,8 @@
 #include "mlir/Conversion/AsyncToLLVM/AsyncToLLVM.h"
 
 #include "../PassDetail.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
 #include "mlir/Dialect/Async/IR/Async.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"

diff  --git a/mlir/lib/Conversion/AsyncToLLVM/CMakeLists.txt b/mlir/lib/Conversion/AsyncToLLVM/CMakeLists.txt
index 48c2e0155ae1c..eeb2bc82b5aca 100644
--- a/mlir/lib/Conversion/AsyncToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/AsyncToLLVM/CMakeLists.txt
@@ -12,6 +12,7 @@ add_mlir_conversion_library(MLIRAsyncToLLVM
 
   LINK_LIBS PUBLIC
   MLIRAsync
+  MLIRLLVMCommonConversion
   MLIRLLVMIR
   MLIRStandardOpsTransforms
   MLIRStandardToLLVM

diff  --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt
index b2bbb1be60f00..18c4850fd5ac0 100644
--- a/mlir/lib/Conversion/CMakeLists.txt
+++ b/mlir/lib/Conversion/CMakeLists.txt
@@ -13,6 +13,7 @@ add_subdirectory(LinalgToSPIRV)
 add_subdirectory(LinalgToStandard)
 add_subdirectory(LLVMCommon)
 add_subdirectory(MathToLibm)
+add_subdirectory(MemRefToLLVM)
 add_subdirectory(OpenACCToLLVM)
 add_subdirectory(OpenACCToSCF)
 add_subdirectory(OpenMPToLLVM)

diff  --git a/mlir/lib/Conversion/ComplexToLLVM/CMakeLists.txt b/mlir/lib/Conversion/ComplexToLLVM/CMakeLists.txt
index 7e9aae35007f2..8f6a0d7edbddc 100644
--- a/mlir/lib/Conversion/ComplexToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/ComplexToLLVM/CMakeLists.txt
@@ -15,6 +15,5 @@ add_mlir_conversion_library(MLIRComplexToLLVM
   MLIRLLVMCommonConversion
   MLIRLLVMIR
   MLIRStandardOpsTransforms
-  MLIRStandardToLLVM
   MLIRTransforms
   )

diff  --git a/mlir/lib/Conversion/ComplexToLLVM/ComplexToLLVM.cpp b/mlir/lib/Conversion/ComplexToLLVM/ComplexToLLVM.cpp
index 6e81570bd9985..1a7c2b3959192 100644
--- a/mlir/lib/Conversion/ComplexToLLVM/ComplexToLLVM.cpp
+++ b/mlir/lib/Conversion/ComplexToLLVM/ComplexToLLVM.cpp
@@ -9,7 +9,8 @@
 #include "mlir/Conversion/ComplexToLLVM/ComplexToLLVM.h"
 
 #include "../PassDetail.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Dialect/Complex/IR/Complex.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 

diff  --git a/mlir/lib/Conversion/GPUCommon/CMakeLists.txt b/mlir/lib/Conversion/GPUCommon/CMakeLists.txt
index 988071e6a00de..64125e495a159 100644
--- a/mlir/lib/Conversion/GPUCommon/CMakeLists.txt
+++ b/mlir/lib/Conversion/GPUCommon/CMakeLists.txt
@@ -34,6 +34,7 @@ add_mlir_conversion_library(MLIRGPUToGPURuntimeTransforms
   MLIRIR
   MLIRLLVMCommonConversion
   MLIRLLVMIR
+  MLIRMemRefToLLVM
   MLIRPass
   MLIRSupport
   MLIRStandardToLLVM

diff  --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
index b3a5d4078b887..9d54001d10292 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
+++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
@@ -8,7 +8,7 @@
 #ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
 #define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
 
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
+#include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 

diff  --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
index 557eabcad79e9..328414b6e2055 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
@@ -17,7 +17,9 @@
 
 #include "../PassDetail.h"
 #include "mlir/Conversion/AsyncToLLVM/AsyncToLLVM.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
 #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
@@ -316,6 +318,7 @@ void GpuToLLVMConversionPass::runOnOperation() {
   target.addIllegalDialect<gpu::GPUDialect>();
 
   populateVectorToLLVMConversionPatterns(converter, patterns);
+  populateMemRefToLLVMConversionPatterns(converter, patterns);
   populateStdToLLVMConversionPatterns(converter, patterns);
   populateAsyncStructuralTypeConversionsAndLegality(converter, patterns,
                                                     target);

diff  --git a/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h b/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h
index 53a92e9c17eb2..1f8012272fb90 100644
--- a/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h
+++ b/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h
@@ -8,7 +8,7 @@
 #ifndef MLIR_CONVERSION_GPUCOMMON_INDEXINTRINSICSOPLOWERING_H_
 #define MLIR_CONVERSION_GPUCOMMON_INDEXINTRINSICSOPLOWERING_H_
 
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
+#include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "llvm/ADT/StringSwitch.h"

diff  --git a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
index f98009d65e844..520a3fa274318 100644
--- a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
+++ b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
@@ -8,7 +8,7 @@
 #ifndef MLIR_CONVERSION_GPUCOMMON_OPTOFUNCCALLLOWERING_H_
 #define MLIR_CONVERSION_GPUCOMMON_OPTOFUNCCALLLOWERING_H_
 
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
+#include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"

diff  --git a/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt b/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt
index 119c322464f3a..e85449cb16936 100644
--- a/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/GPUToNVVM/CMakeLists.txt
@@ -16,6 +16,7 @@ add_mlir_conversion_library(MLIRGPUToNVVMTransforms
   MLIRLLVMCommonConversion
   MLIRLLVMIR
   MLIRMemRef
+  MLIRMemRefToLLVM
   MLIRNVVMIR
   MLIRPass
   MLIRStandardToLLVM

diff  --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index f6979fe186c41..243ed6c334d3e 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -13,8 +13,10 @@
 
 #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
 
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
 #include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/GPU/Passes.h"
@@ -169,6 +171,7 @@ struct LowerGpuOpsToNVVMOpsPass
     (void)applyPatternsAndFoldGreedily(m, std::move(patterns));
 
     populateStdToLLVMConversionPatterns(converter, llvmPatterns);
+    populateMemRefToLLVMConversionPatterns(converter, llvmPatterns);
     populateGpuToNVVMConversionPatterns(converter, llvmPatterns);
     populateGpuWMMAToNVVMConversionPatterns(converter, llvmPatterns);
     LLVMConversionTarget target(getContext());

diff  --git a/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp b/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp
index d955673d4b898..3a86e2e96bb96 100644
--- a/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
+#include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"

diff  --git a/mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt b/mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt
index 15430fe5375e7..c3d6c4e7ad324 100644
--- a/mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt
+++ b/mlir/lib/Conversion/GPUToROCDL/CMakeLists.txt
@@ -14,6 +14,7 @@ add_mlir_conversion_library(MLIRGPUToROCDLTransforms
   MLIRGPUToGPURuntimeTransforms
   MLIRLLVMCommonConversion
   MLIRLLVMIR
+  MLIRMemRefToLLVM
   MLIRROCDLIR
   MLIRPass
   MLIRStandardToLLVM

diff  --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 497e7deeb4f02..001c45e653c04 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -13,8 +13,10 @@
 
 #include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"
 
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
 #include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
 #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
 #include "mlir/Conversion/VectorToROCDL/VectorToROCDL.h"
@@ -73,6 +75,7 @@ struct LowerGpuOpsToROCDLOpsPass
     populateVectorToLLVMConversionPatterns(converter, llvmPatterns);
     populateVectorToROCDLConversionPatterns(converter, llvmPatterns);
     populateStdToLLVMConversionPatterns(converter, llvmPatterns);
+    populateMemRefToLLVMConversionPatterns(converter, llvmPatterns);
     populateGpuToROCDLConversionPatterns(converter, llvmPatterns);
     LLVMConversionTarget target(getContext());
     configureGpuToROCDLConversionLegality(target);

diff  --git a/mlir/lib/Conversion/LLVMCommon/CMakeLists.txt b/mlir/lib/Conversion/LLVMCommon/CMakeLists.txt
index 3657e56e61b17..eb0b72027a18f 100644
--- a/mlir/lib/Conversion/LLVMCommon/CMakeLists.txt
+++ b/mlir/lib/Conversion/LLVMCommon/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_mlir_conversion_library(MLIRLLVMCommonConversion
+  ConversionTarget.cpp
   LoweringOptions.cpp
   MemRefBuilder.cpp
   Pattern.cpp

diff  --git a/mlir/lib/Conversion/LLVMCommon/ConversionTarget.cpp b/mlir/lib/Conversion/LLVMCommon/ConversionTarget.cpp
new file mode 100644
index 0000000000000..8e73baf0b6404
--- /dev/null
+++ b/mlir/lib/Conversion/LLVMCommon/ConversionTarget.cpp
@@ -0,0 +1,18 @@
+//===- ConversionTarget.cpp - Target for converting to the LLVM dialect ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+
+using namespace mlir;
+
+mlir::LLVMConversionTarget::LLVMConversionTarget(MLIRContext &ctx)
+    : ConversionTarget(ctx) {
+  this->addLegalDialect<LLVM::LLVMDialect>();
+  this->addIllegalOp<LLVM::DialectCastOp>();
+}

diff  --git a/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt b/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt
index 05aee893a9cb5..d3e94d7ed858a 100644
--- a/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt
@@ -16,9 +16,9 @@ add_mlir_conversion_library(MLIRLinalgToLLVM
   MLIRAffineToStandard
   MLIRIR
   MLIRLinalg
+  MLIRLLVMCommonConversion
   MLIRLLVMIR
   MLIRSCFToStandard
-  MLIRStandardToLLVM
   MLIRTransforms
   MLIRVectorToLLVM
   MLIRVectorToSCF

diff  --git a/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp b/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp
index 66e9e332f93af..9357407f55216 100644
--- a/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp
+++ b/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp
@@ -10,9 +10,10 @@
 
 #include "../PassDetail.h"
 #include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Conversion/LLVMCommon/Pattern.h"
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
 #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
 #include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"

diff  --git a/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp b/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp
new file mode 100644
index 0000000000000..fa32f8f77f3b2
--- /dev/null
+++ b/mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp
@@ -0,0 +1,54 @@
+//===- AllocLikeConversion.cpp - LLVM conversion for alloc operations -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/MemRefToLLVM/AllocLikeConversion.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+
+using namespace mlir;
+
+Value AllocLikeOpLLVMLowering::createAligned(
+    ConversionPatternRewriter &rewriter, Location loc, Value input,
+    Value alignment) {
+  Value one = createIndexAttrConstant(rewriter, loc, alignment.getType(), 1);
+  Value bump = rewriter.create<LLVM::SubOp>(loc, alignment, one);
+  Value bumped = rewriter.create<LLVM::AddOp>(loc, input, bump);
+  Value mod = rewriter.create<LLVM::URemOp>(loc, bumped, alignment);
+  return rewriter.create<LLVM::SubOp>(loc, bumped, mod);
+}
+
+LogicalResult AllocLikeOpLLVMLowering::matchAndRewrite(
+    Operation *op, ArrayRef<Value> operands,
+    ConversionPatternRewriter &rewriter) const {
+  MemRefType memRefType = getMemRefResultType(op);
+  if (!isConvertibleAndHasIdentityMaps(memRefType))
+    return rewriter.notifyMatchFailure(op, "incompatible memref type");
+  auto loc = op->getLoc();
+
+  // Get actual sizes of the memref as values: static sizes are constant
+  // values and dynamic sizes are passed to 'alloc' as operands.  In case of
+  // zero-dimensional memref, assume a scalar (size 1).
+  SmallVector<Value, 4> sizes;
+  SmallVector<Value, 4> strides;
+  Value sizeBytes;
+  this->getMemRefDescriptorSizes(loc, memRefType, operands, rewriter, sizes,
+                                 strides, sizeBytes);
+
+  // Allocate the underlying buffer.
+  Value allocatedPtr;
+  Value alignedPtr;
+  std::tie(allocatedPtr, alignedPtr) =
+      this->allocateBuffer(rewriter, loc, sizeBytes, op);
+
+  // Create the MemRef descriptor.
+  auto memRefDescriptor = this->createMemRefDescriptor(
+      loc, memRefType, allocatedPtr, alignedPtr, sizes, strides, rewriter);
+
+  // Return the final value of the descriptor.
+  rewriter.replaceOp(op, {memRefDescriptor});
+  return success();
+}

diff  --git a/mlir/lib/Conversion/MemRefToLLVM/CMakeLists.txt b/mlir/lib/Conversion/MemRefToLLVM/CMakeLists.txt
new file mode 100644
index 0000000000000..4f9508d8c445f
--- /dev/null
+++ b/mlir/lib/Conversion/MemRefToLLVM/CMakeLists.txt
@@ -0,0 +1,21 @@
+add_mlir_conversion_library(MLIRMemRefToLLVM
+  AllocLikeConversion.cpp
+  MemRefToLLVM.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/MemRefToLLVM
+
+  DEPENDS
+  MLIRConversionPassIncGen
+
+  LINK_COMPONENTS
+  Core
+
+  LINK_LIBS PUBLIC
+  MLIRAnalysis
+  MLIRDataLayoutInterfaces
+  MLIRLLVMCommonConversion
+  MLIRMemRef
+  MLIRLLVMIR
+  MLIRTransforms
+  )

diff  --git a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
new file mode 100644
index 0000000000000..a92c4069fcade
--- /dev/null
+++ b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
@@ -0,0 +1,1399 @@
+//===- MemRefToLLVM.cpp - MemRef to LLVM dialect conversion ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
+#include "../PassDetail.h"
+#include "mlir/Analysis/DataLayoutAnalysis.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Conversion/LLVMCommon/Pattern.h"
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Conversion/MemRefToLLVM/AllocLikeConversion.h"
+#include "mlir/Dialect/LLVMIR/FunctionCallUtils.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/IR/AffineMap.h"
+#include "mlir/IR/BlockAndValueMapping.h"
+
+using namespace mlir;
+
+namespace {
+
+struct AllocOpLowering : public AllocLikeOpLLVMLowering {
+  AllocOpLowering(LLVMTypeConverter &converter)
+      : AllocLikeOpLLVMLowering(memref::AllocOp::getOperationName(),
+                                converter) {}
+
+  std::tuple<Value, Value> allocateBuffer(ConversionPatternRewriter &rewriter,
+                                          Location loc, Value sizeBytes,
+                                          Operation *op) const override {
+    // Heap allocations.
+    memref::AllocOp allocOp = cast<memref::AllocOp>(op);
+    MemRefType memRefType = allocOp.getType();
+
+    Value alignment;
+    if (auto alignmentAttr = allocOp.alignment()) {
+      alignment = createIndexConstant(rewriter, loc, *alignmentAttr);
+    } else if (!memRefType.getElementType().isSignlessIntOrIndexOrFloat()) {
+      // In the case where no alignment is specified, we may want to override
+      // `malloc's` behavior. `malloc` typically aligns at the size of the
+      // biggest scalar on a target HW. For non-scalars, use the natural
+      // alignment of the LLVM type given by the LLVM DataLayout.
+      alignment = getSizeInBytes(loc, memRefType.getElementType(), rewriter);
+    }
+
+    if (alignment) {
+      // Adjust the allocation size to consider alignment.
+      sizeBytes = rewriter.create<LLVM::AddOp>(loc, sizeBytes, alignment);
+    }
+
+    // Allocate the underlying buffer and store a pointer to it in the MemRef
+    // descriptor.
+    Type elementPtrType = this->getElementPtrType(memRefType);
+    auto allocFuncOp = LLVM::lookupOrCreateMallocFn(
+        allocOp->getParentOfType<ModuleOp>(), getIndexType());
+    auto results = createLLVMCall(rewriter, loc, allocFuncOp, {sizeBytes},
+                                  getVoidPtrType());
+    Value allocatedPtr =
+        rewriter.create<LLVM::BitcastOp>(loc, elementPtrType, results[0]);
+
+    Value alignedPtr = allocatedPtr;
+    if (alignment) {
+      // Compute the aligned type pointer.
+      Value allocatedInt =
+          rewriter.create<LLVM::PtrToIntOp>(loc, getIndexType(), allocatedPtr);
+      Value alignmentInt =
+          createAligned(rewriter, loc, allocatedInt, alignment);
+      alignedPtr =
+          rewriter.create<LLVM::IntToPtrOp>(loc, elementPtrType, alignmentInt);
+    }
+
+    return std::make_tuple(allocatedPtr, alignedPtr);
+  }
+};
+
+struct AlignedAllocOpLowering : public AllocLikeOpLLVMLowering {
+  AlignedAllocOpLowering(LLVMTypeConverter &converter)
+      : AllocLikeOpLLVMLowering(memref::AllocOp::getOperationName(),
+                                converter) {}
+
+  /// Returns the memref's element size in bytes using the data layout active at
+  /// `op`.
+  // TODO: there are other places where this is used. Expose publicly?
+  unsigned getMemRefEltSizeInBytes(MemRefType memRefType, Operation *op) const {
+    const DataLayout *layout = &defaultLayout;
+    if (const DataLayoutAnalysis *analysis =
+            getTypeConverter()->getDataLayoutAnalysis()) {
+      layout = &analysis->getAbove(op);
+    }
+    Type elementType = memRefType.getElementType();
+    if (auto memRefElementType = elementType.dyn_cast<MemRefType>())
+      return getTypeConverter()->getMemRefDescriptorSize(memRefElementType,
+                                                         *layout);
+    if (auto memRefElementType = elementType.dyn_cast<UnrankedMemRefType>())
+      return getTypeConverter()->getUnrankedMemRefDescriptorSize(
+          memRefElementType, *layout);
+    return layout->getTypeSize(elementType);
+  }
+
+  /// Returns true if the memref size in bytes is known to be a multiple of
+  /// factor assuming the data layout active at `op`.
+  bool isMemRefSizeMultipleOf(MemRefType type, uint64_t factor,
+                              Operation *op) const {
+    uint64_t sizeDivisor = getMemRefEltSizeInBytes(type, op);
+    for (unsigned i = 0, e = type.getRank(); i < e; i++) {
+      if (type.isDynamic(type.getDimSize(i)))
+        continue;
+      sizeDivisor = sizeDivisor * type.getDimSize(i);
+    }
+    return sizeDivisor % factor == 0;
+  }
+
+  /// Returns the alignment to be used for the allocation call itself.
+  /// aligned_alloc requires the allocation size to be a power of two, and the
+  /// allocation size to be a multiple of alignment,
+  int64_t getAllocationAlignment(memref::AllocOp allocOp) const {
+    if (Optional<uint64_t> alignment = allocOp.alignment())
+      return *alignment;
+
+    // Whenever we don't have alignment set, we will use an alignment
+    // consistent with the element type; since the allocation size has to be a
+    // power of two, we will bump to the next power of two if it already isn't.
+    auto eltSizeBytes = getMemRefEltSizeInBytes(allocOp.getType(), allocOp);
+    return std::max(kMinAlignedAllocAlignment,
+                    llvm::PowerOf2Ceil(eltSizeBytes));
+  }
+
+  std::tuple<Value, Value> allocateBuffer(ConversionPatternRewriter &rewriter,
+                                          Location loc, Value sizeBytes,
+                                          Operation *op) const override {
+    // Heap allocations.
+    memref::AllocOp allocOp = cast<memref::AllocOp>(op);
+    MemRefType memRefType = allocOp.getType();
+    int64_t alignment = getAllocationAlignment(allocOp);
+    Value allocAlignment = createIndexConstant(rewriter, loc, alignment);
+
+    // aligned_alloc requires size to be a multiple of alignment; we will pad
+    // the size to the next multiple if necessary.
+    if (!isMemRefSizeMultipleOf(memRefType, alignment, op))
+      sizeBytes = createAligned(rewriter, loc, sizeBytes, allocAlignment);
+
+    Type elementPtrType = this->getElementPtrType(memRefType);
+    auto allocFuncOp = LLVM::lookupOrCreateAlignedAllocFn(
+        allocOp->getParentOfType<ModuleOp>(), getIndexType());
+    auto results =
+        createLLVMCall(rewriter, loc, allocFuncOp, {allocAlignment, sizeBytes},
+                       getVoidPtrType());
+    Value allocatedPtr =
+        rewriter.create<LLVM::BitcastOp>(loc, elementPtrType, results[0]);
+
+    return std::make_tuple(allocatedPtr, allocatedPtr);
+  }
+
+  /// The minimum alignment to use with aligned_alloc (has to be a power of 2).
+  static constexpr uint64_t kMinAlignedAllocAlignment = 16UL;
+
+  /// Default layout to use in absence of the corresponding analysis.
+  DataLayout defaultLayout;
+};
+
+// Out of line definition, required till C++17.
+constexpr uint64_t AlignedAllocOpLowering::kMinAlignedAllocAlignment;
+
+struct AllocaOpLowering : public AllocLikeOpLLVMLowering {
+  AllocaOpLowering(LLVMTypeConverter &converter)
+      : AllocLikeOpLLVMLowering(memref::AllocaOp::getOperationName(),
+                                converter) {}
+
+  /// Allocates the underlying buffer using the right call. `allocatedBytePtr`
+  /// is set to null for stack allocations. `accessAlignment` is set if
+  /// alignment is needed post allocation (for eg. in conjunction with malloc).
+  std::tuple<Value, Value> allocateBuffer(ConversionPatternRewriter &rewriter,
+                                          Location loc, Value sizeBytes,
+                                          Operation *op) const override {
+
+    // With alloca, one gets a pointer to the element type right away.
+    // For stack allocations.
+    auto allocaOp = cast<memref::AllocaOp>(op);
+    auto elementPtrType = this->getElementPtrType(allocaOp.getType());
+
+    auto allocatedElementPtr = rewriter.create<LLVM::AllocaOp>(
+        loc, elementPtrType, sizeBytes,
+        allocaOp.alignment() ? *allocaOp.alignment() : 0);
+
+    return std::make_tuple(allocatedElementPtr, allocatedElementPtr);
+  }
+};
+
+struct AllocaScopeOpLowering
+    : public ConvertOpToLLVMPattern<memref::AllocaScopeOp> {
+  using ConvertOpToLLVMPattern<memref::AllocaScopeOp>::ConvertOpToLLVMPattern;
+
+  LogicalResult
+  matchAndRewrite(memref::AllocaScopeOp allocaScopeOp, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    OpBuilder::InsertionGuard guard(rewriter);
+    Location loc = allocaScopeOp.getLoc();
+
+    // Split the current block before the AllocaScopeOp to create the inlining
+    // point.
+    auto *currentBlock = rewriter.getInsertionBlock();
+    auto *remainingOpsBlock =
+        rewriter.splitBlock(currentBlock, rewriter.getInsertionPoint());
+    Block *continueBlock;
+    if (allocaScopeOp.getNumResults() == 0) {
+      continueBlock = remainingOpsBlock;
+    } else {
+      continueBlock = rewriter.createBlock(remainingOpsBlock,
+                                           allocaScopeOp.getResultTypes());
+      rewriter.create<LLVM::BrOp>(loc, ValueRange(), remainingOpsBlock);
+    }
+
+    // Inline body region.
+    Block *beforeBody = &allocaScopeOp.bodyRegion().front();
+    Block *afterBody = &allocaScopeOp.bodyRegion().back();
+    rewriter.inlineRegionBefore(allocaScopeOp.bodyRegion(), continueBlock);
+
+    // Save stack and then branch into the body of the region.
+    rewriter.setInsertionPointToEnd(currentBlock);
+    auto stackSaveOp =
+        rewriter.create<LLVM::StackSaveOp>(loc, getVoidPtrType());
+    rewriter.create<LLVM::BrOp>(loc, ValueRange(), beforeBody);
+
+    // Replace the alloca_scope return with a branch that jumps out of the body.
+    // Stack restore before leaving the body region.
+    rewriter.setInsertionPointToEnd(afterBody);
+    auto returnOp =
+        cast<memref::AllocaScopeReturnOp>(afterBody->getTerminator());
+    auto branchOp = rewriter.replaceOpWithNewOp<LLVM::BrOp>(
+        returnOp, returnOp.results(), continueBlock);
+
+    // Insert stack restore before jumping out the body of the region.
+    rewriter.setInsertionPoint(branchOp);
+    rewriter.create<LLVM::StackRestoreOp>(loc, stackSaveOp);
+
+    // Replace the op with values return from the body region.
+    rewriter.replaceOp(allocaScopeOp, continueBlock->getArguments());
+
+    return success();
+  }
+};
+
+struct AssumeAlignmentOpLowering
+    : public ConvertOpToLLVMPattern<memref::AssumeAlignmentOp> {
+  using ConvertOpToLLVMPattern<
+      memref::AssumeAlignmentOp>::ConvertOpToLLVMPattern;
+
+  LogicalResult
+  matchAndRewrite(memref::AssumeAlignmentOp op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    memref::AssumeAlignmentOp::Adaptor transformed(operands);
+    Value memref = transformed.memref();
+    unsigned alignment = op.alignment();
+    auto loc = op.getLoc();
+
+    MemRefDescriptor memRefDescriptor(memref);
+    Value ptr = memRefDescriptor.alignedPtr(rewriter, memref.getLoc());
+
+    // Emit llvm.assume(memref.alignedPtr & (alignment - 1) == 0). Notice that
+    // the asserted memref.alignedPtr isn't used anywhere else, as the real
+    // users like load/store/views always re-extract memref.alignedPtr as they
+    // get lowered.
+    //
+    // This relies on LLVM's CSE optimization (potentially after SROA), since
+    // after CSE all memref.alignedPtr instances get de-duplicated into the same
+    // pointer SSA value.
+    auto intPtrType =
+        getIntPtrType(memRefDescriptor.getElementPtrType().getAddressSpace());
+    Value zero = createIndexAttrConstant(rewriter, loc, intPtrType, 0);
+    Value mask =
+        createIndexAttrConstant(rewriter, loc, intPtrType, alignment - 1);
+    Value ptrValue = rewriter.create<LLVM::PtrToIntOp>(loc, intPtrType, ptr);
+    rewriter.create<LLVM::AssumeOp>(
+        loc, rewriter.create<LLVM::ICmpOp>(
+                 loc, LLVM::ICmpPredicate::eq,
+                 rewriter.create<LLVM::AndOp>(loc, ptrValue, mask), zero));
+
+    rewriter.eraseOp(op);
+    return success();
+  }
+};
+
+// A `dealloc` is converted into a call to `free` on the underlying data buffer.
+// The memref descriptor being an SSA value, there is no need to clean it up
+// in any way.
+struct DeallocOpLowering : public ConvertOpToLLVMPattern<memref::DeallocOp> {
+  using ConvertOpToLLVMPattern<memref::DeallocOp>::ConvertOpToLLVMPattern;
+
+  explicit DeallocOpLowering(LLVMTypeConverter &converter)
+      : ConvertOpToLLVMPattern<memref::DeallocOp>(converter) {}
+
+  LogicalResult
+  matchAndRewrite(memref::DeallocOp op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    assert(operands.size() == 1 && "dealloc takes one operand");
+    memref::DeallocOp::Adaptor transformed(operands);
+
+    // Insert the `free` declaration if it is not already present.
+    auto freeFunc = LLVM::lookupOrCreateFreeFn(op->getParentOfType<ModuleOp>());
+    MemRefDescriptor memref(transformed.memref());
+    Value casted = rewriter.create<LLVM::BitcastOp>(
+        op.getLoc(), getVoidPtrType(),
+        memref.allocatedPtr(rewriter, op.getLoc()));
+    rewriter.replaceOpWithNewOp<LLVM::CallOp>(
+        op, TypeRange(), rewriter.getSymbolRefAttr(freeFunc), casted);
+    return success();
+  }
+};
+
+// A `dim` is converted to a constant for static sizes and to an access to the
+// size stored in the memref descriptor for dynamic sizes.
+struct DimOpLowering : public ConvertOpToLLVMPattern<memref::DimOp> {
+  using ConvertOpToLLVMPattern<memref::DimOp>::ConvertOpToLLVMPattern;
+
+  LogicalResult
+  matchAndRewrite(memref::DimOp dimOp, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    Type operandType = dimOp.source().getType();
+    if (operandType.isa<UnrankedMemRefType>()) {
+      rewriter.replaceOp(dimOp, {extractSizeOfUnrankedMemRef(
+                                    operandType, dimOp, operands, rewriter)});
+
+      return success();
+    }
+    if (operandType.isa<MemRefType>()) {
+      rewriter.replaceOp(dimOp, {extractSizeOfRankedMemRef(
+                                    operandType, dimOp, operands, rewriter)});
+      return success();
+    }
+    llvm_unreachable("expected MemRefType or UnrankedMemRefType");
+  }
+
+private:
+  Value extractSizeOfUnrankedMemRef(Type operandType, memref::DimOp dimOp,
+                                    ArrayRef<Value> operands,
+                                    ConversionPatternRewriter &rewriter) const {
+    Location loc = dimOp.getLoc();
+    memref::DimOp::Adaptor transformed(operands);
+
+    auto unrankedMemRefType = operandType.cast<UnrankedMemRefType>();
+    auto scalarMemRefType =
+        MemRefType::get({}, unrankedMemRefType.getElementType());
+    unsigned addressSpace = unrankedMemRefType.getMemorySpaceAsInt();
+
+    // Extract pointer to the underlying ranked descriptor and bitcast it to a
+    // memref<element_type> descriptor pointer to minimize the number of GEP
+    // operations.
+    UnrankedMemRefDescriptor unrankedDesc(transformed.source());
+    Value underlyingRankedDesc = unrankedDesc.memRefDescPtr(rewriter, loc);
+    Value scalarMemRefDescPtr = rewriter.create<LLVM::BitcastOp>(
+        loc,
+        LLVM::LLVMPointerType::get(typeConverter->convertType(scalarMemRefType),
+                                   addressSpace),
+        underlyingRankedDesc);
+
+    // Get pointer to offset field of memref<element_type> descriptor.
+    Type indexPtrTy = LLVM::LLVMPointerType::get(
+        getTypeConverter()->getIndexType(), addressSpace);
+    Value two = rewriter.create<LLVM::ConstantOp>(
+        loc, typeConverter->convertType(rewriter.getI32Type()),
+        rewriter.getI32IntegerAttr(2));
+    Value offsetPtr = rewriter.create<LLVM::GEPOp>(
+        loc, indexPtrTy, scalarMemRefDescPtr,
+        ValueRange({createIndexConstant(rewriter, loc, 0), two}));
+
+    // The size value that we have to extract can be obtained using GEPop with
+    // `dimOp.index() + 1` index argument.
+    Value idxPlusOne = rewriter.create<LLVM::AddOp>(
+        loc, createIndexConstant(rewriter, loc, 1), transformed.index());
+    Value sizePtr = rewriter.create<LLVM::GEPOp>(loc, indexPtrTy, offsetPtr,
+                                                 ValueRange({idxPlusOne}));
+    return rewriter.create<LLVM::LoadOp>(loc, sizePtr);
+  }
+
+  Optional<int64_t> getConstantDimIndex(memref::DimOp dimOp) const {
+    if (Optional<int64_t> idx = dimOp.getConstantIndex())
+      return idx;
+
+    if (auto constantOp = dimOp.index().getDefiningOp<LLVM::ConstantOp>())
+      return constantOp.value().cast<IntegerAttr>().getValue().getSExtValue();
+
+    return llvm::None;
+  }
+
+  Value extractSizeOfRankedMemRef(Type operandType, memref::DimOp dimOp,
+                                  ArrayRef<Value> operands,
+                                  ConversionPatternRewriter &rewriter) const {
+    Location loc = dimOp.getLoc();
+    memref::DimOp::Adaptor transformed(operands);
+    // Take advantage if index is constant.
+    MemRefType memRefType = operandType.cast<MemRefType>();
+    if (Optional<int64_t> index = getConstantDimIndex(dimOp)) {
+      int64_t i = index.getValue();
+      if (memRefType.isDynamicDim(i)) {
+        // extract dynamic size from the memref descriptor.
+        MemRefDescriptor descriptor(transformed.source());
+        return descriptor.size(rewriter, loc, i);
+      }
+      // Use constant for static size.
+      int64_t dimSize = memRefType.getDimSize(i);
+      return createIndexConstant(rewriter, loc, dimSize);
+    }
+    Value index = transformed.index();
+    int64_t rank = memRefType.getRank();
+    MemRefDescriptor memrefDescriptor(transformed.source());
+    return memrefDescriptor.size(rewriter, loc, index, rank);
+  }
+};
+
+/// Returns the LLVM type of the global variable given the memref type `type`.
+static Type convertGlobalMemrefTypeToLLVM(MemRefType type,
+                                          LLVMTypeConverter &typeConverter) {
+  // LLVM type for a global memref will be a multi-dimension array. For
+  // declarations or uninitialized global memrefs, we can potentially flatten
+  // this to a 1D array. However, for memref.global's with an initial value,
+  // we do not intend to flatten the ElementsAttribute when going from std ->
+  // LLVM dialect, so the LLVM type needs to me a multi-dimension array.
+  Type elementType = typeConverter.convertType(type.getElementType());
+  Type arrayTy = elementType;
+  // Shape has the outermost dim at index 0, so need to walk it backwards
+  for (int64_t dim : llvm::reverse(type.getShape()))
+    arrayTy = LLVM::LLVMArrayType::get(arrayTy, dim);
+  return arrayTy;
+}
+
+/// GlobalMemrefOp is lowered to a LLVM Global Variable.
+struct GlobalMemrefOpLowering
+    : public ConvertOpToLLVMPattern<memref::GlobalOp> {
+  using ConvertOpToLLVMPattern<memref::GlobalOp>::ConvertOpToLLVMPattern;
+
+  LogicalResult
+  matchAndRewrite(memref::GlobalOp global, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    MemRefType type = global.type().cast<MemRefType>();
+    if (!isConvertibleAndHasIdentityMaps(type))
+      return failure();
+
+    Type arrayTy = convertGlobalMemrefTypeToLLVM(type, *getTypeConverter());
+
+    LLVM::Linkage linkage =
+        global.isPublic() ? LLVM::Linkage::External : LLVM::Linkage::Private;
+
+    Attribute initialValue = nullptr;
+    if (!global.isExternal() && !global.isUninitialized()) {
+      auto elementsAttr = global.initial_value()->cast<ElementsAttr>();
+      initialValue = elementsAttr;
+
+      // For scalar memrefs, the global variable created is of the element type,
+      // so unpack the elements attribute to extract the value.
+      if (type.getRank() == 0)
+        initialValue = elementsAttr.getValue({});
+    }
+
+    rewriter.replaceOpWithNewOp<LLVM::GlobalOp>(
+        global, arrayTy, global.constant(), linkage, global.sym_name(),
+        initialValue, /*alignment=*/0, type.getMemorySpaceAsInt());
+    return success();
+  }
+};
+
+/// GetGlobalMemrefOp is lowered into a Memref descriptor with the pointer to
+/// the first element stashed into the descriptor. This reuses
+/// `AllocLikeOpLowering` to reuse the Memref descriptor construction.
+struct GetGlobalMemrefOpLowering : public AllocLikeOpLLVMLowering {
+  GetGlobalMemrefOpLowering(LLVMTypeConverter &converter)
+      : AllocLikeOpLLVMLowering(memref::GetGlobalOp::getOperationName(),
+                                converter) {}
+
+  /// Buffer "allocation" for memref.get_global op is getting the address of
+  /// the global variable referenced.
+  std::tuple<Value, Value> allocateBuffer(ConversionPatternRewriter &rewriter,
+                                          Location loc, Value sizeBytes,
+                                          Operation *op) const override {
+    auto getGlobalOp = cast<memref::GetGlobalOp>(op);
+    MemRefType type = getGlobalOp.result().getType().cast<MemRefType>();
+    unsigned memSpace = type.getMemorySpaceAsInt();
+
+    Type arrayTy = convertGlobalMemrefTypeToLLVM(type, *getTypeConverter());
+    auto addressOf = rewriter.create<LLVM::AddressOfOp>(
+        loc, LLVM::LLVMPointerType::get(arrayTy, memSpace), getGlobalOp.name());
+
+    // Get the address of the first element in the array by creating a GEP with
+    // the address of the GV as the base, and (rank + 1) number of 0 indices.
+    Type elementType = typeConverter->convertType(type.getElementType());
+    Type elementPtrType = LLVM::LLVMPointerType::get(elementType, memSpace);
+
+    SmallVector<Value, 4> operands = {addressOf};
+    operands.insert(operands.end(), type.getRank() + 1,
+                    createIndexConstant(rewriter, loc, 0));
+    auto gep = rewriter.create<LLVM::GEPOp>(loc, elementPtrType, operands);
+
+    // We do not expect the memref obtained using `memref.get_global` to be
+    // ever deallocated. Set the allocated pointer to be known bad value to
+    // help debug if that ever happens.
+    auto intPtrType = getIntPtrType(memSpace);
+    Value deadBeefConst =
+        createIndexAttrConstant(rewriter, op->getLoc(), intPtrType, 0xdeadbeef);
+    auto deadBeefPtr =
+        rewriter.create<LLVM::IntToPtrOp>(loc, elementPtrType, deadBeefConst);
+
+    // Both allocated and aligned pointers are same. We could potentially stash
+    // a nullptr for the allocated pointer since we do not expect any dealloc.
+    return std::make_tuple(deadBeefPtr, gep);
+  }
+};
+
+// Common base for load and store operations on MemRefs. Restricts the match
+// to supported MemRef types. Provides functionality to emit code accessing a
+// specific element of the underlying data buffer.
+template <typename Derived>
+struct LoadStoreOpLowering : public ConvertOpToLLVMPattern<Derived> {
+  using ConvertOpToLLVMPattern<Derived>::ConvertOpToLLVMPattern;
+  using ConvertOpToLLVMPattern<Derived>::isConvertibleAndHasIdentityMaps;
+  using Base = LoadStoreOpLowering<Derived>;
+
+  LogicalResult match(Derived op) const override {
+    MemRefType type = op.getMemRefType();
+    return isConvertibleAndHasIdentityMaps(type) ? success() : failure();
+  }
+};
+
+// Load operation is lowered to obtaining a pointer to the indexed element
+// and loading it.
+struct LoadOpLowering : public LoadStoreOpLowering<memref::LoadOp> {
+  using Base::Base;
+
+  LogicalResult
+  matchAndRewrite(memref::LoadOp loadOp, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    memref::LoadOp::Adaptor transformed(operands);
+    auto type = loadOp.getMemRefType();
+
+    Value dataPtr =
+        getStridedElementPtr(loadOp.getLoc(), type, transformed.memref(),
+                             transformed.indices(), rewriter);
+    rewriter.replaceOpWithNewOp<LLVM::LoadOp>(loadOp, dataPtr);
+    return success();
+  }
+};
+
+// Store operation is lowered to obtaining a pointer to the indexed element,
+// and storing the given value to it.
+struct StoreOpLowering : public LoadStoreOpLowering<memref::StoreOp> {
+  using Base::Base;
+
+  LogicalResult
+  matchAndRewrite(memref::StoreOp op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto type = op.getMemRefType();
+    memref::StoreOp::Adaptor transformed(operands);
+
+    Value dataPtr =
+        getStridedElementPtr(op.getLoc(), type, transformed.memref(),
+                             transformed.indices(), rewriter);
+    rewriter.replaceOpWithNewOp<LLVM::StoreOp>(op, transformed.value(),
+                                               dataPtr);
+    return success();
+  }
+};
+
+// The prefetch operation is lowered in a way similar to the load operation
+// except that the llvm.prefetch operation is used for replacement.
+struct PrefetchOpLowering : public LoadStoreOpLowering<memref::PrefetchOp> {
+  using Base::Base;
+
+  LogicalResult
+  matchAndRewrite(memref::PrefetchOp prefetchOp, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    memref::PrefetchOp::Adaptor transformed(operands);
+    auto type = prefetchOp.getMemRefType();
+    auto loc = prefetchOp.getLoc();
+
+    Value dataPtr = getStridedElementPtr(loc, type, transformed.memref(),
+                                         transformed.indices(), rewriter);
+
+    // Replace with llvm.prefetch.
+    auto llvmI32Type = typeConverter->convertType(rewriter.getIntegerType(32));
+    auto isWrite = rewriter.create<LLVM::ConstantOp>(
+        loc, llvmI32Type, rewriter.getI32IntegerAttr(prefetchOp.isWrite()));
+    auto localityHint = rewriter.create<LLVM::ConstantOp>(
+        loc, llvmI32Type,
+        rewriter.getI32IntegerAttr(prefetchOp.localityHint()));
+    auto isData = rewriter.create<LLVM::ConstantOp>(
+        loc, llvmI32Type, rewriter.getI32IntegerAttr(prefetchOp.isDataCache()));
+
+    rewriter.replaceOpWithNewOp<LLVM::Prefetch>(prefetchOp, dataPtr, isWrite,
+                                                localityHint, isData);
+    return success();
+  }
+};
+
+struct MemRefCastOpLowering : public ConvertOpToLLVMPattern<memref::CastOp> {
+  using ConvertOpToLLVMPattern<memref::CastOp>::ConvertOpToLLVMPattern;
+
+  LogicalResult match(memref::CastOp memRefCastOp) const override {
+    Type srcType = memRefCastOp.getOperand().getType();
+    Type dstType = memRefCastOp.getType();
+
+    // memref::CastOp reduce to bitcast in the ranked MemRef case and can be
+    // used for type erasure. For now they must preserve underlying element type
+    // and require source and result type to have the same rank. Therefore,
+    // perform a sanity check that the underlying structs are the same. Once op
+    // semantics are relaxed we can revisit.
+    if (srcType.isa<MemRefType>() && dstType.isa<MemRefType>())
+      return success(typeConverter->convertType(srcType) ==
+                     typeConverter->convertType(dstType));
+
+    // At least one of the operands is unranked type
+    assert(srcType.isa<UnrankedMemRefType>() ||
+           dstType.isa<UnrankedMemRefType>());
+
+    // Unranked to unranked cast is disallowed
+    return !(srcType.isa<UnrankedMemRefType>() &&
+             dstType.isa<UnrankedMemRefType>())
+               ? success()
+               : failure();
+  }
+
+  void rewrite(memref::CastOp memRefCastOp, ArrayRef<Value> operands,
+               ConversionPatternRewriter &rewriter) const override {
+    memref::CastOp::Adaptor transformed(operands);
+
+    auto srcType = memRefCastOp.getOperand().getType();
+    auto dstType = memRefCastOp.getType();
+    auto targetStructType = typeConverter->convertType(memRefCastOp.getType());
+    auto loc = memRefCastOp.getLoc();
+
+    // For ranked/ranked case, just keep the original descriptor.
+    if (srcType.isa<MemRefType>() && dstType.isa<MemRefType>())
+      return rewriter.replaceOp(memRefCastOp, {transformed.source()});
+
+    if (srcType.isa<MemRefType>() && dstType.isa<UnrankedMemRefType>()) {
+      // Casting ranked to unranked memref type
+      // Set the rank in the destination from the memref type
+      // Allocate space on the stack and copy the src memref descriptor
+      // Set the ptr in the destination to the stack space
+      auto srcMemRefType = srcType.cast<MemRefType>();
+      int64_t rank = srcMemRefType.getRank();
+      // ptr = AllocaOp sizeof(MemRefDescriptor)
+      auto ptr = getTypeConverter()->promoteOneMemRefDescriptor(
+          loc, transformed.source(), rewriter);
+      // voidptr = BitCastOp srcType* to void*
+      auto voidPtr =
+          rewriter.create<LLVM::BitcastOp>(loc, getVoidPtrType(), ptr)
+              .getResult();
+      // rank = ConstantOp srcRank
+      auto rankVal = rewriter.create<LLVM::ConstantOp>(
+          loc, typeConverter->convertType(rewriter.getIntegerType(64)),
+          rewriter.getI64IntegerAttr(rank));
+      // undef = UndefOp
+      UnrankedMemRefDescriptor memRefDesc =
+          UnrankedMemRefDescriptor::undef(rewriter, loc, targetStructType);
+      // d1 = InsertValueOp undef, rank, 0
+      memRefDesc.setRank(rewriter, loc, rankVal);
+      // d2 = InsertValueOp d1, voidptr, 1
+      memRefDesc.setMemRefDescPtr(rewriter, loc, voidPtr);
+      rewriter.replaceOp(memRefCastOp, (Value)memRefDesc);
+
+    } else if (srcType.isa<UnrankedMemRefType>() && dstType.isa<MemRefType>()) {
+      // Casting from unranked type to ranked.
+      // The operation is assumed to be doing a correct cast. If the destination
+      // type mismatches the unranked the type, it is undefined behavior.
+      UnrankedMemRefDescriptor memRefDesc(transformed.source());
+      // ptr = ExtractValueOp src, 1
+      auto ptr = memRefDesc.memRefDescPtr(rewriter, loc);
+      // castPtr = BitCastOp i8* to structTy*
+      auto castPtr =
+          rewriter
+              .create<LLVM::BitcastOp>(
+                  loc, LLVM::LLVMPointerType::get(targetStructType), ptr)
+              .getResult();
+      // struct = LoadOp castPtr
+      auto loadOp = rewriter.create<LLVM::LoadOp>(loc, castPtr);
+      rewriter.replaceOp(memRefCastOp, loadOp.getResult());
+    } else {
+      llvm_unreachable("Unsupported unranked memref to unranked memref cast");
+    }
+  }
+};
+
+struct MemRefCopyOpLowering : public ConvertOpToLLVMPattern<memref::CopyOp> {
+  using ConvertOpToLLVMPattern<memref::CopyOp>::ConvertOpToLLVMPattern;
+
+  LogicalResult
+  matchAndRewrite(memref::CopyOp op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto loc = op.getLoc();
+    memref::CopyOp::Adaptor adaptor(operands);
+    auto srcType = op.source().getType().cast<BaseMemRefType>();
+    auto targetType = op.target().getType().cast<BaseMemRefType>();
+
+    // First make sure we have an unranked memref descriptor representation.
+    auto makeUnranked = [&, this](Value ranked, BaseMemRefType type) {
+      auto rank = rewriter.create<LLVM::ConstantOp>(
+          loc, getIndexType(), rewriter.getIndexAttr(type.getRank()));
+      auto *typeConverter = getTypeConverter();
+      auto ptr =
+          typeConverter->promoteOneMemRefDescriptor(loc, ranked, rewriter);
+      auto voidPtr =
+          rewriter.create<LLVM::BitcastOp>(loc, getVoidPtrType(), ptr)
+              .getResult();
+      auto unrankedType =
+          UnrankedMemRefType::get(type.getElementType(), type.getMemorySpace());
+      return UnrankedMemRefDescriptor::pack(rewriter, loc, *typeConverter,
+                                            unrankedType,
+                                            ValueRange{rank, voidPtr});
+    };
+
+    Value unrankedSource = srcType.hasRank()
+                               ? makeUnranked(adaptor.source(), srcType)
+                               : adaptor.source();
+    Value unrankedTarget = targetType.hasRank()
+                               ? makeUnranked(adaptor.target(), targetType)
+                               : adaptor.target();
+
+    // Now promote the unranked descriptors to the stack.
+    auto one = rewriter.create<LLVM::ConstantOp>(loc, getIndexType(),
+                                                 rewriter.getIndexAttr(1));
+    auto promote = [&](Value desc) {
+      auto ptrType = LLVM::LLVMPointerType::get(desc.getType());
+      auto allocated =
+          rewriter.create<LLVM::AllocaOp>(loc, ptrType, ValueRange{one});
+      rewriter.create<LLVM::StoreOp>(loc, desc, allocated);
+      return allocated;
+    };
+
+    auto sourcePtr = promote(unrankedSource);
+    auto targetPtr = promote(unrankedTarget);
+
+    auto elemSize = rewriter.create<LLVM::ConstantOp>(
+        loc, getIndexType(),
+        rewriter.getIndexAttr(srcType.getElementTypeBitWidth() / 8));
+    auto copyFn = LLVM::lookupOrCreateMemRefCopyFn(
+        op->getParentOfType<ModuleOp>(), getIndexType(), sourcePtr.getType());
+    rewriter.create<LLVM::CallOp>(loc, copyFn,
+                                  ValueRange{elemSize, sourcePtr, targetPtr});
+    rewriter.eraseOp(op);
+
+    return success();
+  }
+};
+
+/// Extracts allocated, aligned pointers and offset from a ranked or unranked
+/// memref type. In unranked case, the fields are extracted from the underlying
+/// ranked descriptor.
+static void extractPointersAndOffset(Location loc,
+                                     ConversionPatternRewriter &rewriter,
+                                     LLVMTypeConverter &typeConverter,
+                                     Value originalOperand,
+                                     Value convertedOperand,
+                                     Value *allocatedPtr, Value *alignedPtr,
+                                     Value *offset = nullptr) {
+  Type operandType = originalOperand.getType();
+  if (operandType.isa<MemRefType>()) {
+    MemRefDescriptor desc(convertedOperand);
+    *allocatedPtr = desc.allocatedPtr(rewriter, loc);
+    *alignedPtr = desc.alignedPtr(rewriter, loc);
+    if (offset != nullptr)
+      *offset = desc.offset(rewriter, loc);
+    return;
+  }
+
+  unsigned memorySpace =
+      operandType.cast<UnrankedMemRefType>().getMemorySpaceAsInt();
+  Type elementType = operandType.cast<UnrankedMemRefType>().getElementType();
+  Type llvmElementType = typeConverter.convertType(elementType);
+  Type elementPtrPtrType = LLVM::LLVMPointerType::get(
+      LLVM::LLVMPointerType::get(llvmElementType, memorySpace));
+
+  // Extract pointer to the underlying ranked memref descriptor and cast it to
+  // ElemType**.
+  UnrankedMemRefDescriptor unrankedDesc(convertedOperand);
+  Value underlyingDescPtr = unrankedDesc.memRefDescPtr(rewriter, loc);
+
+  *allocatedPtr = UnrankedMemRefDescriptor::allocatedPtr(
+      rewriter, loc, underlyingDescPtr, elementPtrPtrType);
+  *alignedPtr = UnrankedMemRefDescriptor::alignedPtr(
+      rewriter, loc, typeConverter, underlyingDescPtr, elementPtrPtrType);
+  if (offset != nullptr) {
+    *offset = UnrankedMemRefDescriptor::offset(
+        rewriter, loc, typeConverter, underlyingDescPtr, elementPtrPtrType);
+  }
+}
+
+struct MemRefReinterpretCastOpLowering
+    : public ConvertOpToLLVMPattern<memref::ReinterpretCastOp> {
+  using ConvertOpToLLVMPattern<
+      memref::ReinterpretCastOp>::ConvertOpToLLVMPattern;
+
+  LogicalResult
+  matchAndRewrite(memref::ReinterpretCastOp castOp, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    memref::ReinterpretCastOp::Adaptor adaptor(operands,
+                                               castOp->getAttrDictionary());
+    Type srcType = castOp.source().getType();
+
+    Value descriptor;
+    if (failed(convertSourceMemRefToDescriptor(rewriter, srcType, castOp,
+                                               adaptor, &descriptor)))
+      return failure();
+    rewriter.replaceOp(castOp, {descriptor});
+    return success();
+  }
+
+private:
+  LogicalResult convertSourceMemRefToDescriptor(
+      ConversionPatternRewriter &rewriter, Type srcType,
+      memref::ReinterpretCastOp castOp,
+      memref::ReinterpretCastOp::Adaptor adaptor, Value *descriptor) const {
+    MemRefType targetMemRefType =
+        castOp.getResult().getType().cast<MemRefType>();
+    auto llvmTargetDescriptorTy = typeConverter->convertType(targetMemRefType)
+                                      .dyn_cast_or_null<LLVM::LLVMStructType>();
+    if (!llvmTargetDescriptorTy)
+      return failure();
+
+    // Create descriptor.
+    Location loc = castOp.getLoc();
+    auto desc = MemRefDescriptor::undef(rewriter, loc, llvmTargetDescriptorTy);
+
+    // Set allocated and aligned pointers.
+    Value allocatedPtr, alignedPtr;
+    extractPointersAndOffset(loc, rewriter, *getTypeConverter(),
+                             castOp.source(), adaptor.source(), &allocatedPtr,
+                             &alignedPtr);
+    desc.setAllocatedPtr(rewriter, loc, allocatedPtr);
+    desc.setAlignedPtr(rewriter, loc, alignedPtr);
+
+    // Set offset.
+    if (castOp.isDynamicOffset(0))
+      desc.setOffset(rewriter, loc, adaptor.offsets()[0]);
+    else
+      desc.setConstantOffset(rewriter, loc, castOp.getStaticOffset(0));
+
+    // Set sizes and strides.
+    unsigned dynSizeId = 0;
+    unsigned dynStrideId = 0;
+    for (unsigned i = 0, e = targetMemRefType.getRank(); i < e; ++i) {
+      if (castOp.isDynamicSize(i))
+        desc.setSize(rewriter, loc, i, adaptor.sizes()[dynSizeId++]);
+      else
+        desc.setConstantSize(rewriter, loc, i, castOp.getStaticSize(i));
+
+      if (castOp.isDynamicStride(i))
+        desc.setStride(rewriter, loc, i, adaptor.strides()[dynStrideId++]);
+      else
+        desc.setConstantStride(rewriter, loc, i, castOp.getStaticStride(i));
+    }
+    *descriptor = desc;
+    return success();
+  }
+};
+
+struct MemRefReshapeOpLowering
+    : public ConvertOpToLLVMPattern<memref::ReshapeOp> {
+  using ConvertOpToLLVMPattern<memref::ReshapeOp>::ConvertOpToLLVMPattern;
+
+  LogicalResult
+  matchAndRewrite(memref::ReshapeOp reshapeOp, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto *op = reshapeOp.getOperation();
+    memref::ReshapeOp::Adaptor adaptor(operands, op->getAttrDictionary());
+    Type srcType = reshapeOp.source().getType();
+
+    Value descriptor;
+    if (failed(convertSourceMemRefToDescriptor(rewriter, srcType, reshapeOp,
+                                               adaptor, &descriptor)))
+      return failure();
+    rewriter.replaceOp(op, {descriptor});
+    return success();
+  }
+
+private:
+  LogicalResult
+  convertSourceMemRefToDescriptor(ConversionPatternRewriter &rewriter,
+                                  Type srcType, memref::ReshapeOp reshapeOp,
+                                  memref::ReshapeOp::Adaptor adaptor,
+                                  Value *descriptor) const {
+    // Conversion for statically-known shape args is performed via
+    // `memref_reinterpret_cast`.
+    auto shapeMemRefType = reshapeOp.shape().getType().cast<MemRefType>();
+    if (shapeMemRefType.hasStaticShape())
+      return failure();
+
+    // The shape is a rank-1 tensor with unknown length.
+    Location loc = reshapeOp.getLoc();
+    MemRefDescriptor shapeDesc(adaptor.shape());
+    Value resultRank = shapeDesc.size(rewriter, loc, 0);
+
+    // Extract address space and element type.
+    auto targetType =
+        reshapeOp.getResult().getType().cast<UnrankedMemRefType>();
+    unsigned addressSpace = targetType.getMemorySpaceAsInt();
+    Type elementType = targetType.getElementType();
+
+    // Create the unranked memref descriptor that holds the ranked one. The
+    // inner descriptor is allocated on stack.
+    auto targetDesc = UnrankedMemRefDescriptor::undef(
+        rewriter, loc, typeConverter->convertType(targetType));
+    targetDesc.setRank(rewriter, loc, resultRank);
+    SmallVector<Value, 4> sizes;
+    UnrankedMemRefDescriptor::computeSizes(rewriter, loc, *getTypeConverter(),
+                                           targetDesc, sizes);
+    Value underlyingDescPtr = rewriter.create<LLVM::AllocaOp>(
+        loc, getVoidPtrType(), sizes.front(), llvm::None);
+    targetDesc.setMemRefDescPtr(rewriter, loc, underlyingDescPtr);
+
+    // Extract pointers and offset from the source memref.
+    Value allocatedPtr, alignedPtr, offset;
+    extractPointersAndOffset(loc, rewriter, *getTypeConverter(),
+                             reshapeOp.source(), adaptor.source(),
+                             &allocatedPtr, &alignedPtr, &offset);
+
+    // Set pointers and offset.
+    Type llvmElementType = typeConverter->convertType(elementType);
+    auto elementPtrPtrType = LLVM::LLVMPointerType::get(
+        LLVM::LLVMPointerType::get(llvmElementType, addressSpace));
+    UnrankedMemRefDescriptor::setAllocatedPtr(rewriter, loc, underlyingDescPtr,
+                                              elementPtrPtrType, allocatedPtr);
+    UnrankedMemRefDescriptor::setAlignedPtr(rewriter, loc, *getTypeConverter(),
+                                            underlyingDescPtr,
+                                            elementPtrPtrType, alignedPtr);
+    UnrankedMemRefDescriptor::setOffset(rewriter, loc, *getTypeConverter(),
+                                        underlyingDescPtr, elementPtrPtrType,
+                                        offset);
+
+    // Use the offset pointer as base for further addressing. Copy over the new
+    // shape and compute strides. For this, we create a loop from rank-1 to 0.
+    Value targetSizesBase = UnrankedMemRefDescriptor::sizeBasePtr(
+        rewriter, loc, *getTypeConverter(), underlyingDescPtr,
+        elementPtrPtrType);
+    Value targetStridesBase = UnrankedMemRefDescriptor::strideBasePtr(
+        rewriter, loc, *getTypeConverter(), targetSizesBase, resultRank);
+    Value shapeOperandPtr = shapeDesc.alignedPtr(rewriter, loc);
+    Value oneIndex = createIndexConstant(rewriter, loc, 1);
+    Value resultRankMinusOne =
+        rewriter.create<LLVM::SubOp>(loc, resultRank, oneIndex);
+
+    Block *initBlock = rewriter.getInsertionBlock();
+    Type indexType = getTypeConverter()->getIndexType();
+    Block::iterator remainingOpsIt = std::next(rewriter.getInsertionPoint());
+
+    Block *condBlock = rewriter.createBlock(initBlock->getParent(), {},
+                                            {indexType, indexType});
+
+    // Move the remaining initBlock ops to condBlock.
+    Block *remainingBlock = rewriter.splitBlock(initBlock, remainingOpsIt);
+    rewriter.mergeBlocks(remainingBlock, condBlock, ValueRange());
+
+    rewriter.setInsertionPointToEnd(initBlock);
+    rewriter.create<LLVM::BrOp>(loc, ValueRange({resultRankMinusOne, oneIndex}),
+                                condBlock);
+    rewriter.setInsertionPointToStart(condBlock);
+    Value indexArg = condBlock->getArgument(0);
+    Value strideArg = condBlock->getArgument(1);
+
+    Value zeroIndex = createIndexConstant(rewriter, loc, 0);
+    Value pred = rewriter.create<LLVM::ICmpOp>(
+        loc, IntegerType::get(rewriter.getContext(), 1),
+        LLVM::ICmpPredicate::sge, indexArg, zeroIndex);
+
+    Block *bodyBlock =
+        rewriter.splitBlock(condBlock, rewriter.getInsertionPoint());
+    rewriter.setInsertionPointToStart(bodyBlock);
+
+    // Copy size from shape to descriptor.
+    Type llvmIndexPtrType = LLVM::LLVMPointerType::get(indexType);
+    Value sizeLoadGep = rewriter.create<LLVM::GEPOp>(
+        loc, llvmIndexPtrType, shapeOperandPtr, ValueRange{indexArg});
+    Value size = rewriter.create<LLVM::LoadOp>(loc, sizeLoadGep);
+    UnrankedMemRefDescriptor::setSize(rewriter, loc, *getTypeConverter(),
+                                      targetSizesBase, indexArg, size);
+
+    // Write stride value and compute next one.
+    UnrankedMemRefDescriptor::setStride(rewriter, loc, *getTypeConverter(),
+                                        targetStridesBase, indexArg, strideArg);
+    Value nextStride = rewriter.create<LLVM::MulOp>(loc, strideArg, size);
+
+    // Decrement loop counter and branch back.
+    Value decrement = rewriter.create<LLVM::SubOp>(loc, indexArg, oneIndex);
+    rewriter.create<LLVM::BrOp>(loc, ValueRange({decrement, nextStride}),
+                                condBlock);
+
+    Block *remainder =
+        rewriter.splitBlock(bodyBlock, rewriter.getInsertionPoint());
+
+    // Hook up the cond exit to the remainder.
+    rewriter.setInsertionPointToEnd(condBlock);
+    rewriter.create<LLVM::CondBrOp>(loc, pred, bodyBlock, llvm::None, remainder,
+                                    llvm::None);
+
+    // Reset position to beginning of new remainder block.
+    rewriter.setInsertionPointToStart(remainder);
+
+    *descriptor = targetDesc;
+    return success();
+  }
+};
+
+/// Conversion pattern that transforms a subview op into:
+///   1. An `llvm.mlir.undef` operation to create a memref descriptor
+///   2. Updates to the descriptor to introduce the data ptr, offset, size
+///      and stride.
+/// The subview op is replaced by the descriptor.
+struct SubViewOpLowering : public ConvertOpToLLVMPattern<memref::SubViewOp> {
+  using ConvertOpToLLVMPattern<memref::SubViewOp>::ConvertOpToLLVMPattern;
+
+  LogicalResult
+  matchAndRewrite(memref::SubViewOp subViewOp, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto loc = subViewOp.getLoc();
+
+    auto sourceMemRefType = subViewOp.source().getType().cast<MemRefType>();
+    auto sourceElementTy =
+        typeConverter->convertType(sourceMemRefType.getElementType());
+
+    auto viewMemRefType = subViewOp.getType();
+    auto inferredType = memref::SubViewOp::inferResultType(
+                            subViewOp.getSourceType(),
+                            extractFromI64ArrayAttr(subViewOp.static_offsets()),
+                            extractFromI64ArrayAttr(subViewOp.static_sizes()),
+                            extractFromI64ArrayAttr(subViewOp.static_strides()))
+                            .cast<MemRefType>();
+    auto targetElementTy =
+        typeConverter->convertType(viewMemRefType.getElementType());
+    auto targetDescTy = typeConverter->convertType(viewMemRefType);
+    if (!sourceElementTy || !targetDescTy || !targetElementTy ||
+        !LLVM::isCompatibleType(sourceElementTy) ||
+        !LLVM::isCompatibleType(targetElementTy) ||
+        !LLVM::isCompatibleType(targetDescTy))
+      return failure();
+
+    // Extract the offset and strides from the type.
+    int64_t offset;
+    SmallVector<int64_t, 4> strides;
+    auto successStrides = getStridesAndOffset(inferredType, strides, offset);
+    if (failed(successStrides))
+      return failure();
+
+    // Create the descriptor.
+    if (!LLVM::isCompatibleType(operands.front().getType()))
+      return failure();
+    MemRefDescriptor sourceMemRef(operands.front());
+    auto targetMemRef = MemRefDescriptor::undef(rewriter, loc, targetDescTy);
+
+    // Copy the buffer pointer from the old descriptor to the new one.
+    Value extracted = sourceMemRef.allocatedPtr(rewriter, loc);
+    Value bitcastPtr = rewriter.create<LLVM::BitcastOp>(
+        loc,
+        LLVM::LLVMPointerType::get(targetElementTy,
+                                   viewMemRefType.getMemorySpaceAsInt()),
+        extracted);
+    targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr);
+
+    // Copy the aligned pointer from the old descriptor to the new one.
+    extracted = sourceMemRef.alignedPtr(rewriter, loc);
+    bitcastPtr = rewriter.create<LLVM::BitcastOp>(
+        loc,
+        LLVM::LLVMPointerType::get(targetElementTy,
+                                   viewMemRefType.getMemorySpaceAsInt()),
+        extracted);
+    targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr);
+
+    auto shape = viewMemRefType.getShape();
+    auto inferredShape = inferredType.getShape();
+    size_t inferredShapeRank = inferredShape.size();
+    size_t resultShapeRank = shape.size();
+    llvm::SmallDenseSet<unsigned> unusedDims =
+        computeRankReductionMask(inferredShape, shape).getValue();
+
+    // Extract strides needed to compute offset.
+    SmallVector<Value, 4> strideValues;
+    strideValues.reserve(inferredShapeRank);
+    for (unsigned i = 0; i < inferredShapeRank; ++i)
+      strideValues.push_back(sourceMemRef.stride(rewriter, loc, i));
+
+    // Offset.
+    auto llvmIndexType = typeConverter->convertType(rewriter.getIndexType());
+    if (!ShapedType::isDynamicStrideOrOffset(offset)) {
+      targetMemRef.setConstantOffset(rewriter, loc, offset);
+    } else {
+      Value baseOffset = sourceMemRef.offset(rewriter, loc);
+      // `inferredShapeRank` may be larger than the number of offset operands
+      // because of trailing semantics. In this case, the offset is guaranteed
+      // to be interpreted as 0 and we can just skip the extra dimensions.
+      for (unsigned i = 0, e = std::min(inferredShapeRank,
+                                        subViewOp.getMixedOffsets().size());
+           i < e; ++i) {
+        Value offset =
+            // TODO: need OpFoldResult ODS adaptor to clean this up.
+            subViewOp.isDynamicOffset(i)
+                ? operands[subViewOp.getIndexOfDynamicOffset(i)]
+                : rewriter.create<LLVM::ConstantOp>(
+                      loc, llvmIndexType,
+                      rewriter.getI64IntegerAttr(subViewOp.getStaticOffset(i)));
+        Value mul = rewriter.create<LLVM::MulOp>(loc, offset, strideValues[i]);
+        baseOffset = rewriter.create<LLVM::AddOp>(loc, baseOffset, mul);
+      }
+      targetMemRef.setOffset(rewriter, loc, baseOffset);
+    }
+
+    // Update sizes and strides.
+    SmallVector<OpFoldResult> mixedSizes = subViewOp.getMixedSizes();
+    SmallVector<OpFoldResult> mixedStrides = subViewOp.getMixedStrides();
+    assert(mixedSizes.size() == mixedStrides.size() &&
+           "expected sizes and strides of equal length");
+    for (int i = inferredShapeRank - 1, j = resultShapeRank - 1;
+         i >= 0 && j >= 0; --i) {
+      if (unusedDims.contains(i))
+        continue;
+
+      // `i` may overflow subViewOp.getMixedSizes because of trailing semantics.
+      // In this case, the size is guaranteed to be interpreted as Dim and the
+      // stride as 1.
+      Value size, stride;
+      if (static_cast<unsigned>(i) >= mixedSizes.size()) {
+        // If the static size is available, use it directly. This is similar to
+        // the folding of dim(constant-op) but removes the need for dim to be
+        // aware of LLVM constants and for this pass to be aware of std
+        // constants.
+        int64_t staticSize =
+            subViewOp.source().getType().cast<MemRefType>().getShape()[i];
+        if (staticSize != ShapedType::kDynamicSize) {
+          size = rewriter.create<LLVM::ConstantOp>(
+              loc, llvmIndexType, rewriter.getI64IntegerAttr(staticSize));
+        } else {
+          Value pos = rewriter.create<LLVM::ConstantOp>(
+              loc, llvmIndexType, rewriter.getI64IntegerAttr(i));
+          size = rewriter.create<LLVM::DialectCastOp>(
+              loc, llvmIndexType,
+              rewriter.create<memref::DimOp>(loc, subViewOp.source(), pos));
+        }
+        stride = rewriter.create<LLVM::ConstantOp>(
+            loc, llvmIndexType, rewriter.getI64IntegerAttr(1));
+      } else {
+        // TODO: need OpFoldResult ODS adaptor to clean this up.
+        size =
+            subViewOp.isDynamicSize(i)
+                ? operands[subViewOp.getIndexOfDynamicSize(i)]
+                : rewriter.create<LLVM::ConstantOp>(
+                      loc, llvmIndexType,
+                      rewriter.getI64IntegerAttr(subViewOp.getStaticSize(i)));
+        if (!ShapedType::isDynamicStrideOrOffset(strides[i])) {
+          stride = rewriter.create<LLVM::ConstantOp>(
+              loc, llvmIndexType, rewriter.getI64IntegerAttr(strides[i]));
+        } else {
+          stride = subViewOp.isDynamicStride(i)
+                       ? operands[subViewOp.getIndexOfDynamicStride(i)]
+                       : rewriter.create<LLVM::ConstantOp>(
+                             loc, llvmIndexType,
+                             rewriter.getI64IntegerAttr(
+                                 subViewOp.getStaticStride(i)));
+          stride = rewriter.create<LLVM::MulOp>(loc, stride, strideValues[i]);
+        }
+      }
+      targetMemRef.setSize(rewriter, loc, j, size);
+      targetMemRef.setStride(rewriter, loc, j, stride);
+      j--;
+    }
+
+    rewriter.replaceOp(subViewOp, {targetMemRef});
+    return success();
+  }
+};
+
+/// Conversion pattern that transforms a transpose op into:
+///   1. A function entry `alloca` operation to allocate a ViewDescriptor.
+///   2. A load of the ViewDescriptor from the pointer allocated in 1.
+///   3. Updates to the ViewDescriptor to introduce the data ptr, offset, size
+///      and stride. Size and stride are permutations of the original values.
+///   4. A store of the resulting ViewDescriptor to the alloca'ed pointer.
+/// The transpose op is replaced by the alloca'ed pointer.
+class TransposeOpLowering : public ConvertOpToLLVMPattern<memref::TransposeOp> {
+public:
+  using ConvertOpToLLVMPattern<memref::TransposeOp>::ConvertOpToLLVMPattern;
+
+  LogicalResult
+  matchAndRewrite(memref::TransposeOp transposeOp, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto loc = transposeOp.getLoc();
+    memref::TransposeOpAdaptor adaptor(operands);
+    MemRefDescriptor viewMemRef(adaptor.in());
+
+    // No permutation, early exit.
+    if (transposeOp.permutation().isIdentity())
+      return rewriter.replaceOp(transposeOp, {viewMemRef}), success();
+
+    auto targetMemRef = MemRefDescriptor::undef(
+        rewriter, loc, typeConverter->convertType(transposeOp.getShapedType()));
+
+    // Copy the base and aligned pointers from the old descriptor to the new
+    // one.
+    targetMemRef.setAllocatedPtr(rewriter, loc,
+                                 viewMemRef.allocatedPtr(rewriter, loc));
+    targetMemRef.setAlignedPtr(rewriter, loc,
+                               viewMemRef.alignedPtr(rewriter, loc));
+
+    // Copy the offset pointer from the old descriptor to the new one.
+    targetMemRef.setOffset(rewriter, loc, viewMemRef.offset(rewriter, loc));
+
+    // Iterate over the dimensions and apply size/stride permutation.
+    for (auto en : llvm::enumerate(transposeOp.permutation().getResults())) {
+      int sourcePos = en.index();
+      int targetPos = en.value().cast<AffineDimExpr>().getPosition();
+      targetMemRef.setSize(rewriter, loc, targetPos,
+                           viewMemRef.size(rewriter, loc, sourcePos));
+      targetMemRef.setStride(rewriter, loc, targetPos,
+                             viewMemRef.stride(rewriter, loc, sourcePos));
+    }
+
+    rewriter.replaceOp(transposeOp, {targetMemRef});
+    return success();
+  }
+};
+
+/// Conversion pattern that transforms an op into:
+///   1. An `llvm.mlir.undef` operation to create a memref descriptor
+///   2. Updates to the descriptor to introduce the data ptr, offset, size
+///      and stride.
+/// The view op is replaced by the descriptor.
+struct ViewOpLowering : public ConvertOpToLLVMPattern<memref::ViewOp> {
+  using ConvertOpToLLVMPattern<memref::ViewOp>::ConvertOpToLLVMPattern;
+
+  // Build and return the value for the idx^th shape dimension, either by
+  // returning the constant shape dimension or counting the proper dynamic size.
+  Value getSize(ConversionPatternRewriter &rewriter, Location loc,
+                ArrayRef<int64_t> shape, ValueRange dynamicSizes,
+                unsigned idx) const {
+    assert(idx < shape.size());
+    if (!ShapedType::isDynamic(shape[idx]))
+      return createIndexConstant(rewriter, loc, shape[idx]);
+    // Count the number of dynamic dims in range [0, idx]
+    unsigned nDynamic = llvm::count_if(shape.take_front(idx), [](int64_t v) {
+      return ShapedType::isDynamic(v);
+    });
+    return dynamicSizes[nDynamic];
+  }
+
+  // Build and return the idx^th stride, either by returning the constant stride
+  // or by computing the dynamic stride from the current `runningStride` and
+  // `nextSize`. The caller should keep a running stride and update it with the
+  // result returned by this function.
+  Value getStride(ConversionPatternRewriter &rewriter, Location loc,
+                  ArrayRef<int64_t> strides, Value nextSize,
+                  Value runningStride, unsigned idx) const {
+    assert(idx < strides.size());
+    if (!MemRefType::isDynamicStrideOrOffset(strides[idx]))
+      return createIndexConstant(rewriter, loc, strides[idx]);
+    if (nextSize)
+      return runningStride
+                 ? rewriter.create<LLVM::MulOp>(loc, runningStride, nextSize)
+                 : nextSize;
+    assert(!runningStride);
+    return createIndexConstant(rewriter, loc, 1);
+  }
+
+  LogicalResult
+  matchAndRewrite(memref::ViewOp viewOp, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto loc = viewOp.getLoc();
+    memref::ViewOpAdaptor adaptor(operands);
+
+    auto viewMemRefType = viewOp.getType();
+    auto targetElementTy =
+        typeConverter->convertType(viewMemRefType.getElementType());
+    auto targetDescTy = typeConverter->convertType(viewMemRefType);
+    if (!targetDescTy || !targetElementTy ||
+        !LLVM::isCompatibleType(targetElementTy) ||
+        !LLVM::isCompatibleType(targetDescTy))
+      return viewOp.emitWarning("Target descriptor type not converted to LLVM"),
+             failure();
+
+    int64_t offset;
+    SmallVector<int64_t, 4> strides;
+    auto successStrides = getStridesAndOffset(viewMemRefType, strides, offset);
+    if (failed(successStrides))
+      return viewOp.emitWarning("cannot cast to non-strided shape"), failure();
+    assert(offset == 0 && "expected offset to be 0");
+
+    // Create the descriptor.
+    MemRefDescriptor sourceMemRef(adaptor.source());
+    auto targetMemRef = MemRefDescriptor::undef(rewriter, loc, targetDescTy);
+
+    // Field 1: Copy the allocated pointer, used for malloc/free.
+    Value allocatedPtr = sourceMemRef.allocatedPtr(rewriter, loc);
+    auto srcMemRefType = viewOp.source().getType().cast<MemRefType>();
+    Value bitcastPtr = rewriter.create<LLVM::BitcastOp>(
+        loc,
+        LLVM::LLVMPointerType::get(targetElementTy,
+                                   srcMemRefType.getMemorySpaceAsInt()),
+        allocatedPtr);
+    targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr);
+
+    // Field 2: Copy the actual aligned pointer to payload.
+    Value alignedPtr = sourceMemRef.alignedPtr(rewriter, loc);
+    alignedPtr = rewriter.create<LLVM::GEPOp>(loc, alignedPtr.getType(),
+                                              alignedPtr, adaptor.byte_shift());
+    bitcastPtr = rewriter.create<LLVM::BitcastOp>(
+        loc,
+        LLVM::LLVMPointerType::get(targetElementTy,
+                                   srcMemRefType.getMemorySpaceAsInt()),
+        alignedPtr);
+    targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr);
+
+    // Field 3: The offset in the resulting type must be 0. This is because of
+    // the type change: an offset on srcType* may not be expressible as an
+    // offset on dstType*.
+    targetMemRef.setOffset(rewriter, loc,
+                           createIndexConstant(rewriter, loc, offset));
+
+    // Early exit for 0-D corner case.
+    if (viewMemRefType.getRank() == 0)
+      return rewriter.replaceOp(viewOp, {targetMemRef}), success();
+
+    // Fields 4 and 5: Update sizes and strides.
+    if (strides.back() != 1)
+      return viewOp.emitWarning("cannot cast to non-contiguous shape"),
+             failure();
+    Value stride = nullptr, nextSize = nullptr;
+    for (int i = viewMemRefType.getRank() - 1; i >= 0; --i) {
+      // Update size.
+      Value size =
+          getSize(rewriter, loc, viewMemRefType.getShape(), adaptor.sizes(), i);
+      targetMemRef.setSize(rewriter, loc, i, size);
+      // Update stride.
+      stride = getStride(rewriter, loc, strides, nextSize, stride, i);
+      targetMemRef.setStride(rewriter, loc, i, stride);
+      nextSize = size;
+    }
+
+    rewriter.replaceOp(viewOp, {targetMemRef});
+    return success();
+  }
+};
+
+} // namespace
+
+void mlir::populateMemRefToLLVMConversionPatterns(LLVMTypeConverter &converter,
+                                                  RewritePatternSet &patterns) {
+  // clang-format off
+  patterns.add<
+      AllocaOpLowering,
+      AllocaScopeOpLowering,
+      AssumeAlignmentOpLowering,
+      DimOpLowering,
+      DeallocOpLowering,
+      GlobalMemrefOpLowering,
+      GetGlobalMemrefOpLowering,
+      LoadOpLowering,
+      MemRefCastOpLowering,
+      MemRefCopyOpLowering,
+      MemRefReinterpretCastOpLowering,
+      MemRefReshapeOpLowering,
+      PrefetchOpLowering,
+      StoreOpLowering,
+      SubViewOpLowering,
+      TransposeOpLowering,
+      ViewOpLowering>(converter);
+  // clang-format on
+  auto allocLowering = converter.getOptions().allocLowering;
+  if (allocLowering == LowerToLLVMOptions::AllocLowering::AlignedAlloc)
+    patterns.add<AlignedAllocOpLowering, DeallocOpLowering>(converter);
+  else if (allocLowering == LowerToLLVMOptions::AllocLowering::Malloc)
+    patterns.add<AllocOpLowering, DeallocOpLowering>(converter);
+}
+
+namespace {
+struct MemRefToLLVMPass : public ConvertMemRefToLLVMBase<MemRefToLLVMPass> {
+  MemRefToLLVMPass() = default;
+
+  void runOnOperation() override {
+    Operation *op = getOperation();
+    const auto &dataLayoutAnalysis = getAnalysis<DataLayoutAnalysis>();
+    LowerToLLVMOptions options(&getContext(),
+                               dataLayoutAnalysis.getAtOrAbove(op));
+    options.allocLowering =
+        (useAlignedAlloc ? LowerToLLVMOptions::AllocLowering::AlignedAlloc
+                         : LowerToLLVMOptions::AllocLowering::Malloc);
+    if (indexBitwidth != kDeriveIndexBitwidthFromDataLayout)
+      options.overrideIndexBitwidth(indexBitwidth);
+
+    LLVMTypeConverter typeConverter(&getContext(), options,
+                                    &dataLayoutAnalysis);
+    RewritePatternSet patterns(&getContext());
+    populateMemRefToLLVMConversionPatterns(typeConverter, patterns);
+    LLVMConversionTarget target(getContext());
+    target.addLegalOp<LLVM::DialectCastOp, FuncOp>();
+    if (failed(applyPartialConversion(op, target, std::move(patterns))))
+      signalPassFailure();
+  }
+};
+} // namespace
+
+std::unique_ptr<Pass> mlir::createMemRefToLLVMPass() {
+  return std::make_unique<MemRefToLLVMPass>();
+}

diff  --git a/mlir/lib/Conversion/OpenACCToLLVM/CMakeLists.txt b/mlir/lib/Conversion/OpenACCToLLVM/CMakeLists.txt
index 83693c1eda668..b72afe846d1f5 100644
--- a/mlir/lib/Conversion/OpenACCToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/OpenACCToLLVM/CMakeLists.txt
@@ -10,7 +10,7 @@ add_mlir_conversion_library(MLIROpenACCToLLVM
   LINK_LIBS PUBLIC
   MLIRIR
   MLIRLLVMIR
+  MLIRMemRefToLLVM
   MLIROpenACC
-  MLIRStandardToLLVM
   MLIRTransforms
   )

diff  --git a/mlir/lib/Conversion/OpenACCToLLVM/OpenACCToLLVM.cpp b/mlir/lib/Conversion/OpenACCToLLVM/OpenACCToLLVM.cpp
index fce5ed0d1295b..b4f7e73ed28c5 100644
--- a/mlir/lib/Conversion/OpenACCToLLVM/OpenACCToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenACCToLLVM/OpenACCToLLVM.cpp
@@ -7,9 +7,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "../PassDetail.h"
+#include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Conversion/OpenACCToLLVM/ConvertOpenACCToLLVM.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/IR/Builders.h"
 
 using namespace mlir;
 

diff  --git a/mlir/lib/Conversion/OpenMPToLLVM/CMakeLists.txt b/mlir/lib/Conversion/OpenMPToLLVM/CMakeLists.txt
index e0774746960f9..cc50908ed2d44 100644
--- a/mlir/lib/Conversion/OpenMPToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/OpenMPToLLVM/CMakeLists.txt
@@ -15,6 +15,7 @@ add_mlir_conversion_library(MLIROpenMPToLLVM
   MLIRIR
   MLIRLLVMCommonConversion
   MLIRLLVMIR
+  MLIRMemRefToLLVM
   MLIROpenMP
   MLIRStandardToLLVM
   MLIRTransforms

diff  --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
index 3a9eff6ead6d3..bf725c242811a 100644
--- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
@@ -9,7 +9,9 @@
 #include "mlir/Conversion/OpenMPToLLVM/ConvertOpenMPToLLVM.h"
 
 #include "../PassDetail.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
@@ -62,6 +64,7 @@ void ConvertOpenMPToLLVMPass::runOnOperation() {
   // Convert to OpenMP operations with LLVM IR dialect
   RewritePatternSet patterns(&getContext());
   LLVMTypeConverter converter(&getContext());
+  populateMemRefToLLVMConversionPatterns(converter, patterns);
   populateStdToLLVMConversionPatterns(converter, patterns);
   populateOpenMPToLLVMConversionPatterns(converter, patterns);
 

diff  --git a/mlir/lib/Conversion/SPIRVToLLVM/CMakeLists.txt b/mlir/lib/Conversion/SPIRVToLLVM/CMakeLists.txt
index f8f06d5871554..f8c811f71f7b9 100644
--- a/mlir/lib/Conversion/SPIRVToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/SPIRVToLLVM/CMakeLists.txt
@@ -16,6 +16,7 @@ add_mlir_conversion_library(MLIRSPIRVToLLVM
   MLIRSPIRVUtils
   MLIRLLVMCommonConversion
   MLIRLLVMIR
+  MLIRMemRefToLLVM
   MLIRStandardToLLVM
   MLIRIR
   MLIRTransforms

diff  --git a/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp b/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp
index 4c1486dbdd253..354a0a62934b3 100644
--- a/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp
+++ b/mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp
@@ -13,7 +13,9 @@
 
 #include "../PassDetail.h"
 #include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
+#include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Conversion/SPIRVToLLVM/SPIRVToLLVM.h"
 #include "mlir/Conversion/SPIRVToLLVM/SPIRVToLLVMPass.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
@@ -282,6 +284,7 @@ class LowerHostCodeToLLVM
     auto *context = module.getContext();
     RewritePatternSet patterns(context);
     LLVMTypeConverter typeConverter(context, options);
+    populateMemRefToLLVMConversionPatterns(typeConverter, patterns);
     populateStdToLLVMConversionPatterns(typeConverter, patterns);
     patterns.add<GPULaunchLowering>(typeConverter);
 

diff  --git a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp
index 7913c09923030..108618602dde9 100644
--- a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp
+++ b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp
@@ -11,8 +11,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Conversion/SPIRVToLLVM/SPIRVToLLVM.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
+#include "mlir/Conversion/LLVMCommon/Pattern.h"
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVOps.h"

diff  --git a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVMPass.cpp b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVMPass.cpp
index 293128c8446bd..4c0f558157444 100644
--- a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVMPass.cpp
+++ b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVMPass.cpp
@@ -12,9 +12,8 @@
 
 #include "mlir/Conversion/SPIRVToLLVM/SPIRVToLLVMPass.h"
 #include "../PassDetail.h"
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Conversion/SPIRVToLLVM/SPIRVToLLVM.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
 

diff  --git a/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt b/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt
index 4b50250dd8bc4..b335c0a0642d1 100644
--- a/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt
@@ -17,6 +17,5 @@ add_mlir_conversion_library(MLIRStandardToLLVM
   MLIRLLVMCommonConversion
   MLIRLLVMIR
   MLIRMath
-  MLIRMemRef
   MLIRTransforms
   )

diff  --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
index 4ec3c70568765..70b7adb9c7ddb 100644
--- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
+++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
@@ -13,6 +13,7 @@
 
 #include "../PassDetail.h"
 #include "mlir/Analysis/DataLayoutAnalysis.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Conversion/LLVMCommon/VectorPattern.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
@@ -20,7 +21,6 @@
 #include "mlir/Dialect/LLVMIR/FunctionCallUtils.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/Math/IR/Math.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Utils/StaticValueUtils.h"
 #include "mlir/IR/Attributes.h"
@@ -30,7 +30,6 @@
 #include "mlir/IR/MLIRContext.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/IR/TypeUtilities.h"
-#include "mlir/Interfaces/DataLayoutInterfaces.h"
 #include "mlir/Support/LogicalResult.h"
 #include "mlir/Support/MathExtras.h"
 #include "mlir/Transforms/DialectConversion.h"
@@ -499,226 +498,6 @@ struct ConstantOpLowering : public ConvertOpToLLVMPattern<ConstantOp> {
   }
 };
 
-struct AllocOpLowering : public AllocLikeOpLLVMLowering {
-  AllocOpLowering(LLVMTypeConverter &converter)
-      : AllocLikeOpLLVMLowering(memref::AllocOp::getOperationName(),
-                                converter) {}
-
-  std::tuple<Value, Value> allocateBuffer(ConversionPatternRewriter &rewriter,
-                                          Location loc, Value sizeBytes,
-                                          Operation *op) const override {
-    // Heap allocations.
-    memref::AllocOp allocOp = cast<memref::AllocOp>(op);
-    MemRefType memRefType = allocOp.getType();
-
-    Value alignment;
-    if (auto alignmentAttr = allocOp.alignment()) {
-      alignment = createIndexConstant(rewriter, loc, *alignmentAttr);
-    } else if (!memRefType.getElementType().isSignlessIntOrIndexOrFloat()) {
-      // In the case where no alignment is specified, we may want to override
-      // `malloc's` behavior. `malloc` typically aligns at the size of the
-      // biggest scalar on a target HW. For non-scalars, use the natural
-      // alignment of the LLVM type given by the LLVM DataLayout.
-      alignment = getSizeInBytes(loc, memRefType.getElementType(), rewriter);
-    }
-
-    if (alignment) {
-      // Adjust the allocation size to consider alignment.
-      sizeBytes = rewriter.create<LLVM::AddOp>(loc, sizeBytes, alignment);
-    }
-
-    // Allocate the underlying buffer and store a pointer to it in the MemRef
-    // descriptor.
-    Type elementPtrType = this->getElementPtrType(memRefType);
-    auto allocFuncOp = LLVM::lookupOrCreateMallocFn(
-        allocOp->getParentOfType<ModuleOp>(), getIndexType());
-    auto results = createLLVMCall(rewriter, loc, allocFuncOp, {sizeBytes},
-                                  getVoidPtrType());
-    Value allocatedPtr =
-        rewriter.create<LLVM::BitcastOp>(loc, elementPtrType, results[0]);
-
-    Value alignedPtr = allocatedPtr;
-    if (alignment) {
-      // Compute the aligned type pointer.
-      Value allocatedInt =
-          rewriter.create<LLVM::PtrToIntOp>(loc, getIndexType(), allocatedPtr);
-      Value alignmentInt =
-          createAligned(rewriter, loc, allocatedInt, alignment);
-      alignedPtr =
-          rewriter.create<LLVM::IntToPtrOp>(loc, elementPtrType, alignmentInt);
-    }
-
-    return std::make_tuple(allocatedPtr, alignedPtr);
-  }
-};
-
-struct AlignedAllocOpLowering : public AllocLikeOpLLVMLowering {
-  AlignedAllocOpLowering(LLVMTypeConverter &converter)
-      : AllocLikeOpLLVMLowering(memref::AllocOp::getOperationName(),
-                                converter) {}
-
-  /// Returns the memref's element size in bytes using the data layout active at
-  /// `op`.
-  // TODO: there are other places where this is used. Expose publicly?
-  unsigned getMemRefEltSizeInBytes(MemRefType memRefType, Operation *op) const {
-    const DataLayout *layout = &defaultLayout;
-    if (const DataLayoutAnalysis *analysis =
-            getTypeConverter()->getDataLayoutAnalysis()) {
-      layout = &analysis->getAbove(op);
-    }
-    Type elementType = memRefType.getElementType();
-    if (auto memRefElementType = elementType.dyn_cast<MemRefType>())
-      return getTypeConverter()->getMemRefDescriptorSize(memRefElementType,
-                                                         *layout);
-    if (auto memRefElementType = elementType.dyn_cast<UnrankedMemRefType>())
-      return getTypeConverter()->getUnrankedMemRefDescriptorSize(
-          memRefElementType, *layout);
-    return layout->getTypeSize(elementType);
-  }
-
-  /// Returns true if the memref size in bytes is known to be a multiple of
-  /// factor assuming the data layout active at `op`.
-  bool isMemRefSizeMultipleOf(MemRefType type, uint64_t factor,
-                              Operation *op) const {
-    uint64_t sizeDivisor = getMemRefEltSizeInBytes(type, op);
-    for (unsigned i = 0, e = type.getRank(); i < e; i++) {
-      if (type.isDynamic(type.getDimSize(i)))
-        continue;
-      sizeDivisor = sizeDivisor * type.getDimSize(i);
-    }
-    return sizeDivisor % factor == 0;
-  }
-
-  /// Returns the alignment to be used for the allocation call itself.
-  /// aligned_alloc requires the allocation size to be a power of two, and the
-  /// allocation size to be a multiple of alignment,
-  int64_t getAllocationAlignment(memref::AllocOp allocOp) const {
-    if (Optional<uint64_t> alignment = allocOp.alignment())
-      return *alignment;
-
-    // Whenever we don't have alignment set, we will use an alignment
-    // consistent with the element type; since the allocation size has to be a
-    // power of two, we will bump to the next power of two if it already isn't.
-    auto eltSizeBytes = getMemRefEltSizeInBytes(allocOp.getType(), allocOp);
-    return std::max(kMinAlignedAllocAlignment,
-                    llvm::PowerOf2Ceil(eltSizeBytes));
-  }
-
-  std::tuple<Value, Value> allocateBuffer(ConversionPatternRewriter &rewriter,
-                                          Location loc, Value sizeBytes,
-                                          Operation *op) const override {
-    // Heap allocations.
-    memref::AllocOp allocOp = cast<memref::AllocOp>(op);
-    MemRefType memRefType = allocOp.getType();
-    int64_t alignment = getAllocationAlignment(allocOp);
-    Value allocAlignment = createIndexConstant(rewriter, loc, alignment);
-
-    // aligned_alloc requires size to be a multiple of alignment; we will pad
-    // the size to the next multiple if necessary.
-    if (!isMemRefSizeMultipleOf(memRefType, alignment, op))
-      sizeBytes = createAligned(rewriter, loc, sizeBytes, allocAlignment);
-
-    Type elementPtrType = this->getElementPtrType(memRefType);
-    auto allocFuncOp = LLVM::lookupOrCreateAlignedAllocFn(
-        allocOp->getParentOfType<ModuleOp>(), getIndexType());
-    auto results =
-        createLLVMCall(rewriter, loc, allocFuncOp, {allocAlignment, sizeBytes},
-                       getVoidPtrType());
-    Value allocatedPtr =
-        rewriter.create<LLVM::BitcastOp>(loc, elementPtrType, results[0]);
-
-    return std::make_tuple(allocatedPtr, allocatedPtr);
-  }
-
-  /// The minimum alignment to use with aligned_alloc (has to be a power of 2).
-  static constexpr uint64_t kMinAlignedAllocAlignment = 16UL;
-
-  /// Default layout to use in absence of the corresponding analysis.
-  DataLayout defaultLayout;
-};
-
-// Out of line definition, required till C++17.
-constexpr uint64_t AlignedAllocOpLowering::kMinAlignedAllocAlignment;
-
-struct AllocaOpLowering : public AllocLikeOpLLVMLowering {
-  AllocaOpLowering(LLVMTypeConverter &converter)
-      : AllocLikeOpLLVMLowering(memref::AllocaOp::getOperationName(),
-                                converter) {}
-
-  /// Allocates the underlying buffer using the right call. `allocatedBytePtr`
-  /// is set to null for stack allocations. `accessAlignment` is set if
-  /// alignment is needed post allocation (for eg. in conjunction with malloc).
-  std::tuple<Value, Value> allocateBuffer(ConversionPatternRewriter &rewriter,
-                                          Location loc, Value sizeBytes,
-                                          Operation *op) const override {
-
-    // With alloca, one gets a pointer to the element type right away.
-    // For stack allocations.
-    auto allocaOp = cast<memref::AllocaOp>(op);
-    auto elementPtrType = this->getElementPtrType(allocaOp.getType());
-
-    auto allocatedElementPtr = rewriter.create<LLVM::AllocaOp>(
-        loc, elementPtrType, sizeBytes,
-        allocaOp.alignment() ? *allocaOp.alignment() : 0);
-
-    return std::make_tuple(allocatedElementPtr, allocatedElementPtr);
-  }
-};
-
-struct AllocaScopeOpLowering
-    : public ConvertOpToLLVMPattern<memref::AllocaScopeOp> {
-  using ConvertOpToLLVMPattern<memref::AllocaScopeOp>::ConvertOpToLLVMPattern;
-
-  LogicalResult
-  matchAndRewrite(memref::AllocaScopeOp allocaScopeOp, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    OpBuilder::InsertionGuard guard(rewriter);
-    Location loc = allocaScopeOp.getLoc();
-
-    // Split the current block before the AllocaScopeOp to create the inlining
-    // point.
-    auto *currentBlock = rewriter.getInsertionBlock();
-    auto *remainingOpsBlock =
-        rewriter.splitBlock(currentBlock, rewriter.getInsertionPoint());
-    Block *continueBlock;
-    if (allocaScopeOp.getNumResults() == 0) {
-      continueBlock = remainingOpsBlock;
-    } else {
-      continueBlock = rewriter.createBlock(remainingOpsBlock,
-                                           allocaScopeOp.getResultTypes());
-      rewriter.create<BranchOp>(loc, remainingOpsBlock);
-    }
-
-    // Inline body region.
-    Block *beforeBody = &allocaScopeOp.bodyRegion().front();
-    Block *afterBody = &allocaScopeOp.bodyRegion().back();
-    rewriter.inlineRegionBefore(allocaScopeOp.bodyRegion(), continueBlock);
-
-    // Save stack and then branch into the body of the region.
-    rewriter.setInsertionPointToEnd(currentBlock);
-    auto stackSaveOp =
-        rewriter.create<LLVM::StackSaveOp>(loc, getVoidPtrType());
-    rewriter.create<BranchOp>(loc, beforeBody);
-
-    // Replace the alloca_scope return with a branch that jumps out of the body.
-    // Stack restore before leaving the body region.
-    rewriter.setInsertionPointToEnd(afterBody);
-    auto returnOp =
-        cast<memref::AllocaScopeReturnOp>(afterBody->getTerminator());
-    auto branchOp = rewriter.replaceOpWithNewOp<BranchOp>(
-        returnOp, continueBlock, returnOp.results());
-
-    // Insert stack restore before jumping out the body of the region.
-    rewriter.setInsertionPoint(branchOp);
-    rewriter.create<LLVM::StackRestoreOp>(loc, stackSaveOp);
-
-    // Replace the op with values return from the body region.
-    rewriter.replaceOp(allocaScopeOp, continueBlock->getArguments());
-
-    return success();
-  }
-};
-
 /// Copies the shaped descriptor part to (if `toDynamic` is set) or from
 /// (otherwise) the dynamically allocated memory for any operands that were
 /// unranked descriptors originally.
@@ -877,130 +656,6 @@ struct CallIndirectOpLowering : public CallOpInterfaceLowering<CallIndirectOp> {
   using Super::Super;
 };
 
-// A `dealloc` is converted into a call to `free` on the underlying data buffer.
-// The memref descriptor being an SSA value, there is no need to clean it up
-// in any way.
-struct DeallocOpLowering : public ConvertOpToLLVMPattern<memref::DeallocOp> {
-  using ConvertOpToLLVMPattern<memref::DeallocOp>::ConvertOpToLLVMPattern;
-
-  explicit DeallocOpLowering(LLVMTypeConverter &converter)
-      : ConvertOpToLLVMPattern<memref::DeallocOp>(converter) {}
-
-  LogicalResult
-  matchAndRewrite(memref::DeallocOp op, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    assert(operands.size() == 1 && "dealloc takes one operand");
-    memref::DeallocOp::Adaptor transformed(operands);
-
-    // Insert the `free` declaration if it is not already present.
-    auto freeFunc = LLVM::lookupOrCreateFreeFn(op->getParentOfType<ModuleOp>());
-    MemRefDescriptor memref(transformed.memref());
-    Value casted = rewriter.create<LLVM::BitcastOp>(
-        op.getLoc(), getVoidPtrType(),
-        memref.allocatedPtr(rewriter, op.getLoc()));
-    rewriter.replaceOpWithNewOp<LLVM::CallOp>(
-        op, TypeRange(), rewriter.getSymbolRefAttr(freeFunc), casted);
-    return success();
-  }
-};
-
-/// Returns the LLVM type of the global variable given the memref type `type`.
-static Type convertGlobalMemrefTypeToLLVM(MemRefType type,
-                                          LLVMTypeConverter &typeConverter) {
-  // LLVM type for a global memref will be a multi-dimension array. For
-  // declarations or uninitialized global memrefs, we can potentially flatten
-  // this to a 1D array. However, for memref.global's with an initial value,
-  // we do not intend to flatten the ElementsAttribute when going from std ->
-  // LLVM dialect, so the LLVM type needs to me a multi-dimension array.
-  Type elementType = typeConverter.convertType(type.getElementType());
-  Type arrayTy = elementType;
-  // Shape has the outermost dim at index 0, so need to walk it backwards
-  for (int64_t dim : llvm::reverse(type.getShape()))
-    arrayTy = LLVM::LLVMArrayType::get(arrayTy, dim);
-  return arrayTy;
-}
-
-/// GlobalMemrefOp is lowered to a LLVM Global Variable.
-struct GlobalMemrefOpLowering
-    : public ConvertOpToLLVMPattern<memref::GlobalOp> {
-  using ConvertOpToLLVMPattern<memref::GlobalOp>::ConvertOpToLLVMPattern;
-
-  LogicalResult
-  matchAndRewrite(memref::GlobalOp global, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    MemRefType type = global.type().cast<MemRefType>();
-    if (!isConvertibleAndHasIdentityMaps(type))
-      return failure();
-
-    Type arrayTy = convertGlobalMemrefTypeToLLVM(type, *getTypeConverter());
-
-    LLVM::Linkage linkage =
-        global.isPublic() ? LLVM::Linkage::External : LLVM::Linkage::Private;
-
-    Attribute initialValue = nullptr;
-    if (!global.isExternal() && !global.isUninitialized()) {
-      auto elementsAttr = global.initial_value()->cast<ElementsAttr>();
-      initialValue = elementsAttr;
-
-      // For scalar memrefs, the global variable created is of the element type,
-      // so unpack the elements attribute to extract the value.
-      if (type.getRank() == 0)
-        initialValue = elementsAttr.getValue({});
-    }
-
-    rewriter.replaceOpWithNewOp<LLVM::GlobalOp>(
-        global, arrayTy, global.constant(), linkage, global.sym_name(),
-        initialValue, /*alignment=*/0, type.getMemorySpaceAsInt());
-    return success();
-  }
-};
-
-/// GetGlobalMemrefOp is lowered into a Memref descriptor with the pointer to
-/// the first element stashed into the descriptor. This reuses
-/// `AllocLikeOpLowering` to reuse the Memref descriptor construction.
-struct GetGlobalMemrefOpLowering : public AllocLikeOpLLVMLowering {
-  GetGlobalMemrefOpLowering(LLVMTypeConverter &converter)
-      : AllocLikeOpLLVMLowering(memref::GetGlobalOp::getOperationName(),
-                                converter) {}
-
-  /// Buffer "allocation" for memref.get_global op is getting the address of
-  /// the global variable referenced.
-  std::tuple<Value, Value> allocateBuffer(ConversionPatternRewriter &rewriter,
-                                          Location loc, Value sizeBytes,
-                                          Operation *op) const override {
-    auto getGlobalOp = cast<memref::GetGlobalOp>(op);
-    MemRefType type = getGlobalOp.result().getType().cast<MemRefType>();
-    unsigned memSpace = type.getMemorySpaceAsInt();
-
-    Type arrayTy = convertGlobalMemrefTypeToLLVM(type, *getTypeConverter());
-    auto addressOf = rewriter.create<LLVM::AddressOfOp>(
-        loc, LLVM::LLVMPointerType::get(arrayTy, memSpace), getGlobalOp.name());
-
-    // Get the address of the first element in the array by creating a GEP with
-    // the address of the GV as the base, and (rank + 1) number of 0 indices.
-    Type elementType = typeConverter->convertType(type.getElementType());
-    Type elementPtrType = LLVM::LLVMPointerType::get(elementType, memSpace);
-
-    SmallVector<Value, 4> operands = {addressOf};
-    operands.insert(operands.end(), type.getRank() + 1,
-                    createIndexConstant(rewriter, loc, 0));
-    auto gep = rewriter.create<LLVM::GEPOp>(loc, elementPtrType, operands);
-
-    // We do not expect the memref obtained using `memref.get_global` to be
-    // ever deallocated. Set the allocated pointer to be known bad value to
-    // help debug if that ever happens.
-    auto intPtrType = getIntPtrType(memSpace);
-    Value deadBeefConst =
-        createIndexAttrConstant(rewriter, op->getLoc(), intPtrType, 0xdeadbeef);
-    auto deadBeefPtr =
-        rewriter.create<LLVM::IntToPtrOp>(loc, elementPtrType, deadBeefConst);
-
-    // Both allocated and aligned pointers are same. We could potentially stash
-    // a nullptr for the allocated pointer since we do not expect any dealloc.
-    return std::make_tuple(deadBeefPtr, gep);
-  }
-};
-
 // A `expm1` is converted into `exp - 1`.
 struct ExpM1OpLowering : public ConvertOpToLLVMPattern<math::ExpM1Op> {
   using ConvertOpToLLVMPattern<math::ExpM1Op>::ConvertOpToLLVMPattern;
@@ -1164,417 +819,6 @@ struct RsqrtOpLowering : public ConvertOpToLLVMPattern<math::RsqrtOp> {
   }
 };
 
-struct MemRefCastOpLowering : public ConvertOpToLLVMPattern<memref::CastOp> {
-  using ConvertOpToLLVMPattern<memref::CastOp>::ConvertOpToLLVMPattern;
-
-  LogicalResult match(memref::CastOp memRefCastOp) const override {
-    Type srcType = memRefCastOp.getOperand().getType();
-    Type dstType = memRefCastOp.getType();
-
-    // memref::CastOp reduce to bitcast in the ranked MemRef case and can be
-    // used for type erasure. For now they must preserve underlying element type
-    // and require source and result type to have the same rank. Therefore,
-    // perform a sanity check that the underlying structs are the same. Once op
-    // semantics are relaxed we can revisit.
-    if (srcType.isa<MemRefType>() && dstType.isa<MemRefType>())
-      return success(typeConverter->convertType(srcType) ==
-                     typeConverter->convertType(dstType));
-
-    // At least one of the operands is unranked type
-    assert(srcType.isa<UnrankedMemRefType>() ||
-           dstType.isa<UnrankedMemRefType>());
-
-    // Unranked to unranked cast is disallowed
-    return !(srcType.isa<UnrankedMemRefType>() &&
-             dstType.isa<UnrankedMemRefType>())
-               ? success()
-               : failure();
-  }
-
-  void rewrite(memref::CastOp memRefCastOp, ArrayRef<Value> operands,
-               ConversionPatternRewriter &rewriter) const override {
-    memref::CastOp::Adaptor transformed(operands);
-
-    auto srcType = memRefCastOp.getOperand().getType();
-    auto dstType = memRefCastOp.getType();
-    auto targetStructType = typeConverter->convertType(memRefCastOp.getType());
-    auto loc = memRefCastOp.getLoc();
-
-    // For ranked/ranked case, just keep the original descriptor.
-    if (srcType.isa<MemRefType>() && dstType.isa<MemRefType>())
-      return rewriter.replaceOp(memRefCastOp, {transformed.source()});
-
-    if (srcType.isa<MemRefType>() && dstType.isa<UnrankedMemRefType>()) {
-      // Casting ranked to unranked memref type
-      // Set the rank in the destination from the memref type
-      // Allocate space on the stack and copy the src memref descriptor
-      // Set the ptr in the destination to the stack space
-      auto srcMemRefType = srcType.cast<MemRefType>();
-      int64_t rank = srcMemRefType.getRank();
-      // ptr = AllocaOp sizeof(MemRefDescriptor)
-      auto ptr = getTypeConverter()->promoteOneMemRefDescriptor(
-          loc, transformed.source(), rewriter);
-      // voidptr = BitCastOp srcType* to void*
-      auto voidPtr =
-          rewriter.create<LLVM::BitcastOp>(loc, getVoidPtrType(), ptr)
-              .getResult();
-      // rank = ConstantOp srcRank
-      auto rankVal = rewriter.create<LLVM::ConstantOp>(
-          loc, typeConverter->convertType(rewriter.getIntegerType(64)),
-          rewriter.getI64IntegerAttr(rank));
-      // undef = UndefOp
-      UnrankedMemRefDescriptor memRefDesc =
-          UnrankedMemRefDescriptor::undef(rewriter, loc, targetStructType);
-      // d1 = InsertValueOp undef, rank, 0
-      memRefDesc.setRank(rewriter, loc, rankVal);
-      // d2 = InsertValueOp d1, voidptr, 1
-      memRefDesc.setMemRefDescPtr(rewriter, loc, voidPtr);
-      rewriter.replaceOp(memRefCastOp, (Value)memRefDesc);
-
-    } else if (srcType.isa<UnrankedMemRefType>() && dstType.isa<MemRefType>()) {
-      // Casting from unranked type to ranked.
-      // The operation is assumed to be doing a correct cast. If the destination
-      // type mismatches the unranked the type, it is undefined behavior.
-      UnrankedMemRefDescriptor memRefDesc(transformed.source());
-      // ptr = ExtractValueOp src, 1
-      auto ptr = memRefDesc.memRefDescPtr(rewriter, loc);
-      // castPtr = BitCastOp i8* to structTy*
-      auto castPtr =
-          rewriter
-              .create<LLVM::BitcastOp>(
-                  loc, LLVM::LLVMPointerType::get(targetStructType), ptr)
-              .getResult();
-      // struct = LoadOp castPtr
-      auto loadOp = rewriter.create<LLVM::LoadOp>(loc, castPtr);
-      rewriter.replaceOp(memRefCastOp, loadOp.getResult());
-    } else {
-      llvm_unreachable("Unsupported unranked memref to unranked memref cast");
-    }
-  }
-};
-
-struct MemRefCopyOpLowering : public ConvertOpToLLVMPattern<memref::CopyOp> {
-  using ConvertOpToLLVMPattern<memref::CopyOp>::ConvertOpToLLVMPattern;
-
-  LogicalResult
-  matchAndRewrite(memref::CopyOp op, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto loc = op.getLoc();
-    memref::CopyOp::Adaptor adaptor(operands);
-    auto srcType = op.source().getType().cast<BaseMemRefType>();
-    auto targetType = op.target().getType().cast<BaseMemRefType>();
-
-    // First make sure we have an unranked memref descriptor representation.
-    auto makeUnranked = [&, this](Value ranked, BaseMemRefType type) {
-      auto rank = rewriter.create<LLVM::ConstantOp>(
-          loc, getIndexType(), rewriter.getIndexAttr(type.getRank()));
-      auto *typeConverter = getTypeConverter();
-      auto ptr =
-          typeConverter->promoteOneMemRefDescriptor(loc, ranked, rewriter);
-      auto voidPtr =
-          rewriter.create<LLVM::BitcastOp>(loc, getVoidPtrType(), ptr)
-              .getResult();
-      auto unrankedType =
-          UnrankedMemRefType::get(type.getElementType(), type.getMemorySpace());
-      return UnrankedMemRefDescriptor::pack(rewriter, loc, *typeConverter,
-                                            unrankedType,
-                                            ValueRange{rank, voidPtr});
-    };
-
-    Value unrankedSource = srcType.hasRank()
-                               ? makeUnranked(adaptor.source(), srcType)
-                               : adaptor.source();
-    Value unrankedTarget = targetType.hasRank()
-                               ? makeUnranked(adaptor.target(), targetType)
-                               : adaptor.target();
-
-    // Now promote the unranked descriptors to the stack.
-    auto one = rewriter.create<LLVM::ConstantOp>(loc, getIndexType(),
-                                                 rewriter.getIndexAttr(1));
-    auto promote = [&](Value desc) {
-      auto ptrType = LLVM::LLVMPointerType::get(desc.getType());
-      auto allocated =
-          rewriter.create<LLVM::AllocaOp>(loc, ptrType, ValueRange{one});
-      rewriter.create<LLVM::StoreOp>(loc, desc, allocated);
-      return allocated;
-    };
-
-    auto sourcePtr = promote(unrankedSource);
-    auto targetPtr = promote(unrankedTarget);
-
-    auto elemSize = rewriter.create<LLVM::ConstantOp>(
-        loc, getIndexType(),
-        rewriter.getIndexAttr(srcType.getElementTypeBitWidth() / 8));
-    auto copyFn = LLVM::lookupOrCreateMemRefCopyFn(
-        op->getParentOfType<ModuleOp>(), getIndexType(), sourcePtr.getType());
-    rewriter.create<LLVM::CallOp>(loc, copyFn,
-                                  ValueRange{elemSize, sourcePtr, targetPtr});
-    rewriter.eraseOp(op);
-
-    return success();
-  }
-};
-
-/// Extracts allocated, aligned pointers and offset from a ranked or unranked
-/// memref type. In unranked case, the fields are extracted from the underlying
-/// ranked descriptor.
-static void extractPointersAndOffset(Location loc,
-                                     ConversionPatternRewriter &rewriter,
-                                     LLVMTypeConverter &typeConverter,
-                                     Value originalOperand,
-                                     Value convertedOperand,
-                                     Value *allocatedPtr, Value *alignedPtr,
-                                     Value *offset = nullptr) {
-  Type operandType = originalOperand.getType();
-  if (operandType.isa<MemRefType>()) {
-    MemRefDescriptor desc(convertedOperand);
-    *allocatedPtr = desc.allocatedPtr(rewriter, loc);
-    *alignedPtr = desc.alignedPtr(rewriter, loc);
-    if (offset != nullptr)
-      *offset = desc.offset(rewriter, loc);
-    return;
-  }
-
-  unsigned memorySpace =
-      operandType.cast<UnrankedMemRefType>().getMemorySpaceAsInt();
-  Type elementType = operandType.cast<UnrankedMemRefType>().getElementType();
-  Type llvmElementType = typeConverter.convertType(elementType);
-  Type elementPtrPtrType = LLVM::LLVMPointerType::get(
-      LLVM::LLVMPointerType::get(llvmElementType, memorySpace));
-
-  // Extract pointer to the underlying ranked memref descriptor and cast it to
-  // ElemType**.
-  UnrankedMemRefDescriptor unrankedDesc(convertedOperand);
-  Value underlyingDescPtr = unrankedDesc.memRefDescPtr(rewriter, loc);
-
-  *allocatedPtr = UnrankedMemRefDescriptor::allocatedPtr(
-      rewriter, loc, underlyingDescPtr, elementPtrPtrType);
-  *alignedPtr = UnrankedMemRefDescriptor::alignedPtr(
-      rewriter, loc, typeConverter, underlyingDescPtr, elementPtrPtrType);
-  if (offset != nullptr) {
-    *offset = UnrankedMemRefDescriptor::offset(
-        rewriter, loc, typeConverter, underlyingDescPtr, elementPtrPtrType);
-  }
-}
-
-struct MemRefReinterpretCastOpLowering
-    : public ConvertOpToLLVMPattern<memref::ReinterpretCastOp> {
-  using ConvertOpToLLVMPattern<
-      memref::ReinterpretCastOp>::ConvertOpToLLVMPattern;
-
-  LogicalResult
-  matchAndRewrite(memref::ReinterpretCastOp castOp, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    memref::ReinterpretCastOp::Adaptor adaptor(operands,
-                                               castOp->getAttrDictionary());
-    Type srcType = castOp.source().getType();
-
-    Value descriptor;
-    if (failed(convertSourceMemRefToDescriptor(rewriter, srcType, castOp,
-                                               adaptor, &descriptor)))
-      return failure();
-    rewriter.replaceOp(castOp, {descriptor});
-    return success();
-  }
-
-private:
-  LogicalResult convertSourceMemRefToDescriptor(
-      ConversionPatternRewriter &rewriter, Type srcType,
-      memref::ReinterpretCastOp castOp,
-      memref::ReinterpretCastOp::Adaptor adaptor, Value *descriptor) const {
-    MemRefType targetMemRefType =
-        castOp.getResult().getType().cast<MemRefType>();
-    auto llvmTargetDescriptorTy = typeConverter->convertType(targetMemRefType)
-                                      .dyn_cast_or_null<LLVM::LLVMStructType>();
-    if (!llvmTargetDescriptorTy)
-      return failure();
-
-    // Create descriptor.
-    Location loc = castOp.getLoc();
-    auto desc = MemRefDescriptor::undef(rewriter, loc, llvmTargetDescriptorTy);
-
-    // Set allocated and aligned pointers.
-    Value allocatedPtr, alignedPtr;
-    extractPointersAndOffset(loc, rewriter, *getTypeConverter(),
-                             castOp.source(), adaptor.source(), &allocatedPtr,
-                             &alignedPtr);
-    desc.setAllocatedPtr(rewriter, loc, allocatedPtr);
-    desc.setAlignedPtr(rewriter, loc, alignedPtr);
-
-    // Set offset.
-    if (castOp.isDynamicOffset(0))
-      desc.setOffset(rewriter, loc, adaptor.offsets()[0]);
-    else
-      desc.setConstantOffset(rewriter, loc, castOp.getStaticOffset(0));
-
-    // Set sizes and strides.
-    unsigned dynSizeId = 0;
-    unsigned dynStrideId = 0;
-    for (unsigned i = 0, e = targetMemRefType.getRank(); i < e; ++i) {
-      if (castOp.isDynamicSize(i))
-        desc.setSize(rewriter, loc, i, adaptor.sizes()[dynSizeId++]);
-      else
-        desc.setConstantSize(rewriter, loc, i, castOp.getStaticSize(i));
-
-      if (castOp.isDynamicStride(i))
-        desc.setStride(rewriter, loc, i, adaptor.strides()[dynStrideId++]);
-      else
-        desc.setConstantStride(rewriter, loc, i, castOp.getStaticStride(i));
-    }
-    *descriptor = desc;
-    return success();
-  }
-};
-
-struct MemRefReshapeOpLowering
-    : public ConvertOpToLLVMPattern<memref::ReshapeOp> {
-  using ConvertOpToLLVMPattern<memref::ReshapeOp>::ConvertOpToLLVMPattern;
-
-  LogicalResult
-  matchAndRewrite(memref::ReshapeOp reshapeOp, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto *op = reshapeOp.getOperation();
-    memref::ReshapeOp::Adaptor adaptor(operands, op->getAttrDictionary());
-    Type srcType = reshapeOp.source().getType();
-
-    Value descriptor;
-    if (failed(convertSourceMemRefToDescriptor(rewriter, srcType, reshapeOp,
-                                               adaptor, &descriptor)))
-      return failure();
-    rewriter.replaceOp(op, {descriptor});
-    return success();
-  }
-
-private:
-  LogicalResult
-  convertSourceMemRefToDescriptor(ConversionPatternRewriter &rewriter,
-                                  Type srcType, memref::ReshapeOp reshapeOp,
-                                  memref::ReshapeOp::Adaptor adaptor,
-                                  Value *descriptor) const {
-    // Conversion for statically-known shape args is performed via
-    // `memref_reinterpret_cast`.
-    auto shapeMemRefType = reshapeOp.shape().getType().cast<MemRefType>();
-    if (shapeMemRefType.hasStaticShape())
-      return failure();
-
-    // The shape is a rank-1 tensor with unknown length.
-    Location loc = reshapeOp.getLoc();
-    MemRefDescriptor shapeDesc(adaptor.shape());
-    Value resultRank = shapeDesc.size(rewriter, loc, 0);
-
-    // Extract address space and element type.
-    auto targetType =
-        reshapeOp.getResult().getType().cast<UnrankedMemRefType>();
-    unsigned addressSpace = targetType.getMemorySpaceAsInt();
-    Type elementType = targetType.getElementType();
-
-    // Create the unranked memref descriptor that holds the ranked one. The
-    // inner descriptor is allocated on stack.
-    auto targetDesc = UnrankedMemRefDescriptor::undef(
-        rewriter, loc, typeConverter->convertType(targetType));
-    targetDesc.setRank(rewriter, loc, resultRank);
-    SmallVector<Value, 4> sizes;
-    UnrankedMemRefDescriptor::computeSizes(rewriter, loc, *getTypeConverter(),
-                                           targetDesc, sizes);
-    Value underlyingDescPtr = rewriter.create<LLVM::AllocaOp>(
-        loc, getVoidPtrType(), sizes.front(), llvm::None);
-    targetDesc.setMemRefDescPtr(rewriter, loc, underlyingDescPtr);
-
-    // Extract pointers and offset from the source memref.
-    Value allocatedPtr, alignedPtr, offset;
-    extractPointersAndOffset(loc, rewriter, *getTypeConverter(),
-                             reshapeOp.source(), adaptor.source(),
-                             &allocatedPtr, &alignedPtr, &offset);
-
-    // Set pointers and offset.
-    Type llvmElementType = typeConverter->convertType(elementType);
-    auto elementPtrPtrType = LLVM::LLVMPointerType::get(
-        LLVM::LLVMPointerType::get(llvmElementType, addressSpace));
-    UnrankedMemRefDescriptor::setAllocatedPtr(rewriter, loc, underlyingDescPtr,
-                                              elementPtrPtrType, allocatedPtr);
-    UnrankedMemRefDescriptor::setAlignedPtr(rewriter, loc, *getTypeConverter(),
-                                            underlyingDescPtr,
-                                            elementPtrPtrType, alignedPtr);
-    UnrankedMemRefDescriptor::setOffset(rewriter, loc, *getTypeConverter(),
-                                        underlyingDescPtr, elementPtrPtrType,
-                                        offset);
-
-    // Use the offset pointer as base for further addressing. Copy over the new
-    // shape and compute strides. For this, we create a loop from rank-1 to 0.
-    Value targetSizesBase = UnrankedMemRefDescriptor::sizeBasePtr(
-        rewriter, loc, *getTypeConverter(), underlyingDescPtr,
-        elementPtrPtrType);
-    Value targetStridesBase = UnrankedMemRefDescriptor::strideBasePtr(
-        rewriter, loc, *getTypeConverter(), targetSizesBase, resultRank);
-    Value shapeOperandPtr = shapeDesc.alignedPtr(rewriter, loc);
-    Value oneIndex = createIndexConstant(rewriter, loc, 1);
-    Value resultRankMinusOne =
-        rewriter.create<LLVM::SubOp>(loc, resultRank, oneIndex);
-
-    Block *initBlock = rewriter.getInsertionBlock();
-    Type indexType = getTypeConverter()->getIndexType();
-    Block::iterator remainingOpsIt = std::next(rewriter.getInsertionPoint());
-
-    Block *condBlock = rewriter.createBlock(initBlock->getParent(), {},
-                                            {indexType, indexType});
-
-    // Iterate over the remaining ops in initBlock and move them to condBlock.
-    BlockAndValueMapping map;
-    for (auto it = remainingOpsIt, e = initBlock->end(); it != e; ++it) {
-      rewriter.clone(*it, map);
-      rewriter.eraseOp(&*it);
-    }
-
-    rewriter.setInsertionPointToEnd(initBlock);
-    rewriter.create<LLVM::BrOp>(loc, ValueRange({resultRankMinusOne, oneIndex}),
-                                condBlock);
-    rewriter.setInsertionPointToStart(condBlock);
-    Value indexArg = condBlock->getArgument(0);
-    Value strideArg = condBlock->getArgument(1);
-
-    Value zeroIndex = createIndexConstant(rewriter, loc, 0);
-    Value pred = rewriter.create<LLVM::ICmpOp>(
-        loc, IntegerType::get(rewriter.getContext(), 1),
-        LLVM::ICmpPredicate::sge, indexArg, zeroIndex);
-
-    Block *bodyBlock =
-        rewriter.splitBlock(condBlock, rewriter.getInsertionPoint());
-    rewriter.setInsertionPointToStart(bodyBlock);
-
-    // Copy size from shape to descriptor.
-    Type llvmIndexPtrType = LLVM::LLVMPointerType::get(indexType);
-    Value sizeLoadGep = rewriter.create<LLVM::GEPOp>(
-        loc, llvmIndexPtrType, shapeOperandPtr, ValueRange{indexArg});
-    Value size = rewriter.create<LLVM::LoadOp>(loc, sizeLoadGep);
-    UnrankedMemRefDescriptor::setSize(rewriter, loc, *getTypeConverter(),
-                                      targetSizesBase, indexArg, size);
-
-    // Write stride value and compute next one.
-    UnrankedMemRefDescriptor::setStride(rewriter, loc, *getTypeConverter(),
-                                        targetStridesBase, indexArg, strideArg);
-    Value nextStride = rewriter.create<LLVM::MulOp>(loc, strideArg, size);
-
-    // Decrement loop counter and branch back.
-    Value decrement = rewriter.create<LLVM::SubOp>(loc, indexArg, oneIndex);
-    rewriter.create<LLVM::BrOp>(loc, ValueRange({decrement, nextStride}),
-                                condBlock);
-
-    Block *remainder =
-        rewriter.splitBlock(bodyBlock, rewriter.getInsertionPoint());
-
-    // Hook up the cond exit to the remainder.
-    rewriter.setInsertionPointToEnd(condBlock);
-    rewriter.create<LLVM::CondBrOp>(loc, pred, bodyBlock, llvm::None, remainder,
-                                    llvm::None);
-
-    // Reset position to beginning of new remainder block.
-    rewriter.setInsertionPointToStart(remainder);
-
-    *descriptor = targetDesc;
-    return success();
-  }
-};
-
 struct DialectCastOpLowering
     : public ConvertOpToLLVMPattern<LLVM::DialectCastOp> {
   using ConvertOpToLLVMPattern<LLVM::DialectCastOp>::ConvertOpToLLVMPattern;
@@ -1592,96 +836,6 @@ struct DialectCastOpLowering
   }
 };
 
-// A `dim` is converted to a constant for static sizes and to an access to the
-// size stored in the memref descriptor for dynamic sizes.
-struct DimOpLowering : public ConvertOpToLLVMPattern<memref::DimOp> {
-  using ConvertOpToLLVMPattern<memref::DimOp>::ConvertOpToLLVMPattern;
-
-  LogicalResult
-  matchAndRewrite(memref::DimOp dimOp, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    Type operandType = dimOp.source().getType();
-    if (operandType.isa<UnrankedMemRefType>()) {
-      rewriter.replaceOp(dimOp, {extractSizeOfUnrankedMemRef(
-                                    operandType, dimOp, operands, rewriter)});
-
-      return success();
-    }
-    if (operandType.isa<MemRefType>()) {
-      rewriter.replaceOp(dimOp, {extractSizeOfRankedMemRef(
-                                    operandType, dimOp, operands, rewriter)});
-      return success();
-    }
-    llvm_unreachable("expected MemRefType or UnrankedMemRefType");
-  }
-
-private:
-  Value extractSizeOfUnrankedMemRef(Type operandType, memref::DimOp dimOp,
-                                    ArrayRef<Value> operands,
-                                    ConversionPatternRewriter &rewriter) const {
-    Location loc = dimOp.getLoc();
-    memref::DimOp::Adaptor transformed(operands);
-
-    auto unrankedMemRefType = operandType.cast<UnrankedMemRefType>();
-    auto scalarMemRefType =
-        MemRefType::get({}, unrankedMemRefType.getElementType());
-    unsigned addressSpace = unrankedMemRefType.getMemorySpaceAsInt();
-
-    // Extract pointer to the underlying ranked descriptor and bitcast it to a
-    // memref<element_type> descriptor pointer to minimize the number of GEP
-    // operations.
-    UnrankedMemRefDescriptor unrankedDesc(transformed.source());
-    Value underlyingRankedDesc = unrankedDesc.memRefDescPtr(rewriter, loc);
-    Value scalarMemRefDescPtr = rewriter.create<LLVM::BitcastOp>(
-        loc,
-        LLVM::LLVMPointerType::get(typeConverter->convertType(scalarMemRefType),
-                                   addressSpace),
-        underlyingRankedDesc);
-
-    // Get pointer to offset field of memref<element_type> descriptor.
-    Type indexPtrTy = LLVM::LLVMPointerType::get(
-        getTypeConverter()->getIndexType(), addressSpace);
-    Value two = rewriter.create<LLVM::ConstantOp>(
-        loc, typeConverter->convertType(rewriter.getI32Type()),
-        rewriter.getI32IntegerAttr(2));
-    Value offsetPtr = rewriter.create<LLVM::GEPOp>(
-        loc, indexPtrTy, scalarMemRefDescPtr,
-        ValueRange({createIndexConstant(rewriter, loc, 0), two}));
-
-    // The size value that we have to extract can be obtained using GEPop with
-    // `dimOp.index() + 1` index argument.
-    Value idxPlusOne = rewriter.create<LLVM::AddOp>(
-        loc, createIndexConstant(rewriter, loc, 1), transformed.index());
-    Value sizePtr = rewriter.create<LLVM::GEPOp>(loc, indexPtrTy, offsetPtr,
-                                                 ValueRange({idxPlusOne}));
-    return rewriter.create<LLVM::LoadOp>(loc, sizePtr);
-  }
-
-  Value extractSizeOfRankedMemRef(Type operandType, memref::DimOp dimOp,
-                                  ArrayRef<Value> operands,
-                                  ConversionPatternRewriter &rewriter) const {
-    Location loc = dimOp.getLoc();
-    memref::DimOp::Adaptor transformed(operands);
-    // Take advantage if index is constant.
-    MemRefType memRefType = operandType.cast<MemRefType>();
-    if (Optional<int64_t> index = dimOp.getConstantIndex()) {
-      int64_t i = index.getValue();
-      if (memRefType.isDynamicDim(i)) {
-        // extract dynamic size from the memref descriptor.
-        MemRefDescriptor descriptor(transformed.source());
-        return descriptor.size(rewriter, loc, i);
-      }
-      // Use constant for static size.
-      int64_t dimSize = memRefType.getDimSize(i);
-      return createIndexConstant(rewriter, loc, dimSize);
-    }
-    Value index = dimOp.index();
-    int64_t rank = memRefType.getRank();
-    MemRefDescriptor memrefDescriptor(transformed.source());
-    return memrefDescriptor.size(rewriter, loc, index, rank);
-  }
-};
-
 struct RankOpLowering : public ConvertOpToLLVMPattern<RankOp> {
   using ConvertOpToLLVMPattern<RankOp>::ConvertOpToLLVMPattern;
 
@@ -1719,76 +873,6 @@ struct LoadStoreOpLowering : public ConvertOpToLLVMPattern<Derived> {
   }
 };
 
-// Load operation is lowered to obtaining a pointer to the indexed element
-// and loading it.
-struct LoadOpLowering : public LoadStoreOpLowering<memref::LoadOp> {
-  using Base::Base;
-
-  LogicalResult
-  matchAndRewrite(memref::LoadOp loadOp, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    memref::LoadOp::Adaptor transformed(operands);
-    auto type = loadOp.getMemRefType();
-
-    Value dataPtr =
-        getStridedElementPtr(loadOp.getLoc(), type, transformed.memref(),
-                             transformed.indices(), rewriter);
-    rewriter.replaceOpWithNewOp<LLVM::LoadOp>(loadOp, dataPtr);
-    return success();
-  }
-};
-
-// Store operation is lowered to obtaining a pointer to the indexed element,
-// and storing the given value to it.
-struct StoreOpLowering : public LoadStoreOpLowering<memref::StoreOp> {
-  using Base::Base;
-
-  LogicalResult
-  matchAndRewrite(memref::StoreOp op, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto type = op.getMemRefType();
-    memref::StoreOp::Adaptor transformed(operands);
-
-    Value dataPtr =
-        getStridedElementPtr(op.getLoc(), type, transformed.memref(),
-                             transformed.indices(), rewriter);
-    rewriter.replaceOpWithNewOp<LLVM::StoreOp>(op, transformed.value(),
-                                               dataPtr);
-    return success();
-  }
-};
-
-// The prefetch operation is lowered in a way similar to the load operation
-// except that the llvm.prefetch operation is used for replacement.
-struct PrefetchOpLowering : public LoadStoreOpLowering<memref::PrefetchOp> {
-  using Base::Base;
-
-  LogicalResult
-  matchAndRewrite(memref::PrefetchOp prefetchOp, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    memref::PrefetchOp::Adaptor transformed(operands);
-    auto type = prefetchOp.getMemRefType();
-    auto loc = prefetchOp.getLoc();
-
-    Value dataPtr = getStridedElementPtr(loc, type, transformed.memref(),
-                                         transformed.indices(), rewriter);
-
-    // Replace with llvm.prefetch.
-    auto llvmI32Type = typeConverter->convertType(rewriter.getIntegerType(32));
-    auto isWrite = rewriter.create<LLVM::ConstantOp>(
-        loc, llvmI32Type, rewriter.getI32IntegerAttr(prefetchOp.isWrite()));
-    auto localityHint = rewriter.create<LLVM::ConstantOp>(
-        loc, llvmI32Type,
-        rewriter.getI32IntegerAttr(prefetchOp.localityHint()));
-    auto isData = rewriter.create<LLVM::ConstantOp>(
-        loc, llvmI32Type, rewriter.getI32IntegerAttr(prefetchOp.isDataCache()));
-
-    rewriter.replaceOpWithNewOp<LLVM::Prefetch>(prefetchOp, dataPtr, isWrite,
-                                                localityHint, isData);
-    return success();
-  }
-};
-
 // The lowering of index_cast becomes an integer conversion since index becomes
 // an integer.  If the bit width of the source and target integer types is the
 // same, just erase the cast.  If the target type is wider, sign-extend the
@@ -2090,369 +1174,6 @@ struct SplatNdOpLowering : public ConvertOpToLLVMPattern<SplatOp> {
   }
 };
 
-/// Conversion pattern that transforms a subview op into:
-///   1. An `llvm.mlir.undef` operation to create a memref descriptor
-///   2. Updates to the descriptor to introduce the data ptr, offset, size
-///      and stride.
-/// The subview op is replaced by the descriptor.
-struct SubViewOpLowering : public ConvertOpToLLVMPattern<memref::SubViewOp> {
-  using ConvertOpToLLVMPattern<memref::SubViewOp>::ConvertOpToLLVMPattern;
-
-  LogicalResult
-  matchAndRewrite(memref::SubViewOp subViewOp, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto loc = subViewOp.getLoc();
-
-    auto sourceMemRefType = subViewOp.source().getType().cast<MemRefType>();
-    auto sourceElementTy =
-        typeConverter->convertType(sourceMemRefType.getElementType());
-
-    auto viewMemRefType = subViewOp.getType();
-    auto inferredType = memref::SubViewOp::inferResultType(
-                            subViewOp.getSourceType(),
-                            extractFromI64ArrayAttr(subViewOp.static_offsets()),
-                            extractFromI64ArrayAttr(subViewOp.static_sizes()),
-                            extractFromI64ArrayAttr(subViewOp.static_strides()))
-                            .cast<MemRefType>();
-    auto targetElementTy =
-        typeConverter->convertType(viewMemRefType.getElementType());
-    auto targetDescTy = typeConverter->convertType(viewMemRefType);
-    if (!sourceElementTy || !targetDescTy || !targetElementTy ||
-        !LLVM::isCompatibleType(sourceElementTy) ||
-        !LLVM::isCompatibleType(targetElementTy) ||
-        !LLVM::isCompatibleType(targetDescTy))
-      return failure();
-
-    // Extract the offset and strides from the type.
-    int64_t offset;
-    SmallVector<int64_t, 4> strides;
-    auto successStrides = getStridesAndOffset(inferredType, strides, offset);
-    if (failed(successStrides))
-      return failure();
-
-    // Create the descriptor.
-    if (!LLVM::isCompatibleType(operands.front().getType()))
-      return failure();
-    MemRefDescriptor sourceMemRef(operands.front());
-    auto targetMemRef = MemRefDescriptor::undef(rewriter, loc, targetDescTy);
-
-    // Copy the buffer pointer from the old descriptor to the new one.
-    Value extracted = sourceMemRef.allocatedPtr(rewriter, loc);
-    Value bitcastPtr = rewriter.create<LLVM::BitcastOp>(
-        loc,
-        LLVM::LLVMPointerType::get(targetElementTy,
-                                   viewMemRefType.getMemorySpaceAsInt()),
-        extracted);
-    targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr);
-
-    // Copy the aligned pointer from the old descriptor to the new one.
-    extracted = sourceMemRef.alignedPtr(rewriter, loc);
-    bitcastPtr = rewriter.create<LLVM::BitcastOp>(
-        loc,
-        LLVM::LLVMPointerType::get(targetElementTy,
-                                   viewMemRefType.getMemorySpaceAsInt()),
-        extracted);
-    targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr);
-
-    auto shape = viewMemRefType.getShape();
-    auto inferredShape = inferredType.getShape();
-    size_t inferredShapeRank = inferredShape.size();
-    size_t resultShapeRank = shape.size();
-    llvm::SmallDenseSet<unsigned> unusedDims =
-        computeRankReductionMask(inferredShape, shape).getValue();
-
-    // Extract strides needed to compute offset.
-    SmallVector<Value, 4> strideValues;
-    strideValues.reserve(inferredShapeRank);
-    for (unsigned i = 0; i < inferredShapeRank; ++i)
-      strideValues.push_back(sourceMemRef.stride(rewriter, loc, i));
-
-    // Offset.
-    auto llvmIndexType = typeConverter->convertType(rewriter.getIndexType());
-    if (!ShapedType::isDynamicStrideOrOffset(offset)) {
-      targetMemRef.setConstantOffset(rewriter, loc, offset);
-    } else {
-      Value baseOffset = sourceMemRef.offset(rewriter, loc);
-      // `inferredShapeRank` may be larger than the number of offset operands
-      // because of trailing semantics. In this case, the offset is guaranteed
-      // to be interpreted as 0 and we can just skip the extra dimensions.
-      for (unsigned i = 0, e = std::min(inferredShapeRank,
-                                        subViewOp.getMixedOffsets().size());
-           i < e; ++i) {
-        Value offset =
-            // TODO: need OpFoldResult ODS adaptor to clean this up.
-            subViewOp.isDynamicOffset(i)
-                ? operands[subViewOp.getIndexOfDynamicOffset(i)]
-                : rewriter.create<LLVM::ConstantOp>(
-                      loc, llvmIndexType,
-                      rewriter.getI64IntegerAttr(subViewOp.getStaticOffset(i)));
-        Value mul = rewriter.create<LLVM::MulOp>(loc, offset, strideValues[i]);
-        baseOffset = rewriter.create<LLVM::AddOp>(loc, baseOffset, mul);
-      }
-      targetMemRef.setOffset(rewriter, loc, baseOffset);
-    }
-
-    // Update sizes and strides.
-    SmallVector<OpFoldResult> mixedSizes = subViewOp.getMixedSizes();
-    SmallVector<OpFoldResult> mixedStrides = subViewOp.getMixedStrides();
-    assert(mixedSizes.size() == mixedStrides.size() &&
-           "expected sizes and strides of equal length");
-    for (int i = inferredShapeRank - 1, j = resultShapeRank - 1;
-         i >= 0 && j >= 0; --i) {
-      if (unusedDims.contains(i))
-        continue;
-
-      // `i` may overflow subViewOp.getMixedSizes because of trailing semantics.
-      // In this case, the size is guaranteed to be interpreted as Dim and the
-      // stride as 1.
-      Value size, stride;
-      if (static_cast<unsigned>(i) >= mixedSizes.size()) {
-        size = rewriter.create<LLVM::DialectCastOp>(
-            loc, llvmIndexType,
-            rewriter.create<memref::DimOp>(loc, subViewOp.source(), i));
-        stride = rewriter.create<LLVM::ConstantOp>(
-            loc, llvmIndexType, rewriter.getI64IntegerAttr(1));
-      } else {
-        // TODO: need OpFoldResult ODS adaptor to clean this up.
-        size =
-            subViewOp.isDynamicSize(i)
-                ? operands[subViewOp.getIndexOfDynamicSize(i)]
-                : rewriter.create<LLVM::ConstantOp>(
-                      loc, llvmIndexType,
-                      rewriter.getI64IntegerAttr(subViewOp.getStaticSize(i)));
-        if (!ShapedType::isDynamicStrideOrOffset(strides[i])) {
-          stride = rewriter.create<LLVM::ConstantOp>(
-              loc, llvmIndexType, rewriter.getI64IntegerAttr(strides[i]));
-        } else {
-          stride = subViewOp.isDynamicStride(i)
-                       ? operands[subViewOp.getIndexOfDynamicStride(i)]
-                       : rewriter.create<LLVM::ConstantOp>(
-                             loc, llvmIndexType,
-                             rewriter.getI64IntegerAttr(
-                                 subViewOp.getStaticStride(i)));
-          stride = rewriter.create<LLVM::MulOp>(loc, stride, strideValues[i]);
-        }
-      }
-      targetMemRef.setSize(rewriter, loc, j, size);
-      targetMemRef.setStride(rewriter, loc, j, stride);
-      j--;
-    }
-
-    rewriter.replaceOp(subViewOp, {targetMemRef});
-    return success();
-  }
-};
-
-/// Conversion pattern that transforms a transpose op into:
-///   1. A function entry `alloca` operation to allocate a ViewDescriptor.
-///   2. A load of the ViewDescriptor from the pointer allocated in 1.
-///   3. Updates to the ViewDescriptor to introduce the data ptr, offset, size
-///      and stride. Size and stride are permutations of the original values.
-///   4. A store of the resulting ViewDescriptor to the alloca'ed pointer.
-/// The transpose op is replaced by the alloca'ed pointer.
-class TransposeOpLowering : public ConvertOpToLLVMPattern<memref::TransposeOp> {
-public:
-  using ConvertOpToLLVMPattern<memref::TransposeOp>::ConvertOpToLLVMPattern;
-
-  LogicalResult
-  matchAndRewrite(memref::TransposeOp transposeOp, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto loc = transposeOp.getLoc();
-    memref::TransposeOpAdaptor adaptor(operands);
-    MemRefDescriptor viewMemRef(adaptor.in());
-
-    // No permutation, early exit.
-    if (transposeOp.permutation().isIdentity())
-      return rewriter.replaceOp(transposeOp, {viewMemRef}), success();
-
-    auto targetMemRef = MemRefDescriptor::undef(
-        rewriter, loc, typeConverter->convertType(transposeOp.getShapedType()));
-
-    // Copy the base and aligned pointers from the old descriptor to the new
-    // one.
-    targetMemRef.setAllocatedPtr(rewriter, loc,
-                                 viewMemRef.allocatedPtr(rewriter, loc));
-    targetMemRef.setAlignedPtr(rewriter, loc,
-                               viewMemRef.alignedPtr(rewriter, loc));
-
-    // Copy the offset pointer from the old descriptor to the new one.
-    targetMemRef.setOffset(rewriter, loc, viewMemRef.offset(rewriter, loc));
-
-    // Iterate over the dimensions and apply size/stride permutation.
-    for (auto en : llvm::enumerate(transposeOp.permutation().getResults())) {
-      int sourcePos = en.index();
-      int targetPos = en.value().cast<AffineDimExpr>().getPosition();
-      targetMemRef.setSize(rewriter, loc, targetPos,
-                           viewMemRef.size(rewriter, loc, sourcePos));
-      targetMemRef.setStride(rewriter, loc, targetPos,
-                             viewMemRef.stride(rewriter, loc, sourcePos));
-    }
-
-    rewriter.replaceOp(transposeOp, {targetMemRef});
-    return success();
-  }
-};
-
-/// Conversion pattern that transforms an op into:
-///   1. An `llvm.mlir.undef` operation to create a memref descriptor
-///   2. Updates to the descriptor to introduce the data ptr, offset, size
-///      and stride.
-/// The view op is replaced by the descriptor.
-struct ViewOpLowering : public ConvertOpToLLVMPattern<memref::ViewOp> {
-  using ConvertOpToLLVMPattern<memref::ViewOp>::ConvertOpToLLVMPattern;
-
-  // Build and return the value for the idx^th shape dimension, either by
-  // returning the constant shape dimension or counting the proper dynamic size.
-  Value getSize(ConversionPatternRewriter &rewriter, Location loc,
-                ArrayRef<int64_t> shape, ValueRange dynamicSizes,
-                unsigned idx) const {
-    assert(idx < shape.size());
-    if (!ShapedType::isDynamic(shape[idx]))
-      return createIndexConstant(rewriter, loc, shape[idx]);
-    // Count the number of dynamic dims in range [0, idx]
-    unsigned nDynamic = llvm::count_if(shape.take_front(idx), [](int64_t v) {
-      return ShapedType::isDynamic(v);
-    });
-    return dynamicSizes[nDynamic];
-  }
-
-  // Build and return the idx^th stride, either by returning the constant stride
-  // or by computing the dynamic stride from the current `runningStride` and
-  // `nextSize`. The caller should keep a running stride and update it with the
-  // result returned by this function.
-  Value getStride(ConversionPatternRewriter &rewriter, Location loc,
-                  ArrayRef<int64_t> strides, Value nextSize,
-                  Value runningStride, unsigned idx) const {
-    assert(idx < strides.size());
-    if (!MemRefType::isDynamicStrideOrOffset(strides[idx]))
-      return createIndexConstant(rewriter, loc, strides[idx]);
-    if (nextSize)
-      return runningStride
-                 ? rewriter.create<LLVM::MulOp>(loc, runningStride, nextSize)
-                 : nextSize;
-    assert(!runningStride);
-    return createIndexConstant(rewriter, loc, 1);
-  }
-
-  LogicalResult
-  matchAndRewrite(memref::ViewOp viewOp, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto loc = viewOp.getLoc();
-    memref::ViewOpAdaptor adaptor(operands);
-
-    auto viewMemRefType = viewOp.getType();
-    auto targetElementTy =
-        typeConverter->convertType(viewMemRefType.getElementType());
-    auto targetDescTy = typeConverter->convertType(viewMemRefType);
-    if (!targetDescTy || !targetElementTy ||
-        !LLVM::isCompatibleType(targetElementTy) ||
-        !LLVM::isCompatibleType(targetDescTy))
-      return viewOp.emitWarning("Target descriptor type not converted to LLVM"),
-             failure();
-
-    int64_t offset;
-    SmallVector<int64_t, 4> strides;
-    auto successStrides = getStridesAndOffset(viewMemRefType, strides, offset);
-    if (failed(successStrides))
-      return viewOp.emitWarning("cannot cast to non-strided shape"), failure();
-    assert(offset == 0 && "expected offset to be 0");
-
-    // Create the descriptor.
-    MemRefDescriptor sourceMemRef(adaptor.source());
-    auto targetMemRef = MemRefDescriptor::undef(rewriter, loc, targetDescTy);
-
-    // Field 1: Copy the allocated pointer, used for malloc/free.
-    Value allocatedPtr = sourceMemRef.allocatedPtr(rewriter, loc);
-    auto srcMemRefType = viewOp.source().getType().cast<MemRefType>();
-    Value bitcastPtr = rewriter.create<LLVM::BitcastOp>(
-        loc,
-        LLVM::LLVMPointerType::get(targetElementTy,
-                                   srcMemRefType.getMemorySpaceAsInt()),
-        allocatedPtr);
-    targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr);
-
-    // Field 2: Copy the actual aligned pointer to payload.
-    Value alignedPtr = sourceMemRef.alignedPtr(rewriter, loc);
-    alignedPtr = rewriter.create<LLVM::GEPOp>(loc, alignedPtr.getType(),
-                                              alignedPtr, adaptor.byte_shift());
-    bitcastPtr = rewriter.create<LLVM::BitcastOp>(
-        loc,
-        LLVM::LLVMPointerType::get(targetElementTy,
-                                   srcMemRefType.getMemorySpaceAsInt()),
-        alignedPtr);
-    targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr);
-
-    // Field 3: The offset in the resulting type must be 0. This is because of
-    // the type change: an offset on srcType* may not be expressible as an
-    // offset on dstType*.
-    targetMemRef.setOffset(rewriter, loc,
-                           createIndexConstant(rewriter, loc, offset));
-
-    // Early exit for 0-D corner case.
-    if (viewMemRefType.getRank() == 0)
-      return rewriter.replaceOp(viewOp, {targetMemRef}), success();
-
-    // Fields 4 and 5: Update sizes and strides.
-    if (strides.back() != 1)
-      return viewOp.emitWarning("cannot cast to non-contiguous shape"),
-             failure();
-    Value stride = nullptr, nextSize = nullptr;
-    for (int i = viewMemRefType.getRank() - 1; i >= 0; --i) {
-      // Update size.
-      Value size =
-          getSize(rewriter, loc, viewMemRefType.getShape(), adaptor.sizes(), i);
-      targetMemRef.setSize(rewriter, loc, i, size);
-      // Update stride.
-      stride = getStride(rewriter, loc, strides, nextSize, stride, i);
-      targetMemRef.setStride(rewriter, loc, i, stride);
-      nextSize = size;
-    }
-
-    rewriter.replaceOp(viewOp, {targetMemRef});
-    return success();
-  }
-};
-
-struct AssumeAlignmentOpLowering
-    : public ConvertOpToLLVMPattern<memref::AssumeAlignmentOp> {
-  using ConvertOpToLLVMPattern<
-      memref::AssumeAlignmentOp>::ConvertOpToLLVMPattern;
-
-  LogicalResult
-  matchAndRewrite(memref::AssumeAlignmentOp op, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    memref::AssumeAlignmentOp::Adaptor transformed(operands);
-    Value memref = transformed.memref();
-    unsigned alignment = op.alignment();
-    auto loc = op.getLoc();
-
-    MemRefDescriptor memRefDescriptor(memref);
-    Value ptr = memRefDescriptor.alignedPtr(rewriter, memref.getLoc());
-
-    // Emit llvm.assume(memref.alignedPtr & (alignment - 1) == 0). Notice that
-    // the asserted memref.alignedPtr isn't used anywhere else, as the real
-    // users like load/store/views always re-extract memref.alignedPtr as they
-    // get lowered.
-    //
-    // This relies on LLVM's CSE optimization (potentially after SROA), since
-    // after CSE all memref.alignedPtr instances get de-duplicated into the same
-    // pointer SSA value.
-    auto intPtrType =
-        getIntPtrType(memRefDescriptor.getElementPtrType().getAddressSpace());
-    Value zero = createIndexAttrConstant(rewriter, loc, intPtrType, 0);
-    Value mask =
-        createIndexAttrConstant(rewriter, loc, intPtrType, alignment - 1);
-    Value ptrValue = rewriter.create<LLVM::PtrToIntOp>(loc, intPtrType, ptr);
-    rewriter.create<LLVM::AssumeOp>(
-        loc, rewriter.create<LLVM::ICmpOp>(
-                 loc, LLVM::ICmpPredicate::eq,
-                 rewriter.create<LLVM::AndOp>(loc, ptrValue, mask), zero));
-
-    rewriter.eraseOp(op);
-    return success();
-  }
-};
-
 } // namespace
 
 /// Try to match the kind of a std.atomic_rmw to determine whether to use a
@@ -2627,17 +1348,22 @@ struct GenericAtomicRMWOpLowering
 
 } // namespace
 
-/// Collect a set of patterns to convert from the Standard dialect to LLVM.
-void mlir::populateStdToLLVMNonMemoryConversionPatterns(
+void mlir::populateStdToLLVMFuncOpConversionPattern(
     LLVMTypeConverter &converter, RewritePatternSet &patterns) {
-  // FIXME: this should be tablegen'ed
+  if (converter.getOptions().useBarePtrCallConv)
+    patterns.add<BarePtrFuncOpConversion>(converter);
+  else
+    patterns.add<FuncOpConversion>(converter);
+}
+
+void mlir::populateStdToLLVMConversionPatterns(LLVMTypeConverter &converter,
+                                               RewritePatternSet &patterns) {
+  populateStdToLLVMFuncOpConversionPattern(converter, patterns);
   // clang-format off
   patterns.add<
       AbsFOpLowering,
       AddFOpLowering,
       AddIOpLowering,
-      AllocaOpLowering,
-      AllocaScopeOpLowering,
       AndOpLowering,
       AssertOpLowering,
       AtomicRMWOpLowering,
@@ -2673,8 +1399,8 @@ void mlir::populateStdToLLVMNonMemoryConversionPatterns(
       NegFOpLowering,
       OrOpLowering,
       PowFOpLowering,
-      PrefetchOpLowering,
       RemFOpLowering,
+      RankOpLowering,
       ReturnOpLowering,
       RsqrtOpLowering,
       SIToFPOpLowering,
@@ -2701,47 +1427,6 @@ void mlir::populateStdToLLVMNonMemoryConversionPatterns(
   // clang-format on
 }
 
-void mlir::populateStdToLLVMMemoryConversionPatterns(
-    LLVMTypeConverter &converter, RewritePatternSet &patterns) {
-  // clang-format off
-  patterns.add<
-      AssumeAlignmentOpLowering,
-      DimOpLowering,
-      GlobalMemrefOpLowering,
-      GetGlobalMemrefOpLowering,
-      LoadOpLowering,
-      MemRefCastOpLowering,
-      MemRefCopyOpLowering,
-      MemRefReinterpretCastOpLowering,
-      MemRefReshapeOpLowering,
-      RankOpLowering,
-      StoreOpLowering,
-      SubViewOpLowering,
-      TransposeOpLowering,
-      ViewOpLowering>(converter);
-  // clang-format on
-  auto allocLowering = converter.getOptions().allocLowering;
-  if (allocLowering == LowerToLLVMOptions::AllocLowering::AlignedAlloc)
-    patterns.add<AlignedAllocOpLowering, DeallocOpLowering>(converter);
-  else if (allocLowering == LowerToLLVMOptions::AllocLowering::Malloc)
-    patterns.add<AllocOpLowering, DeallocOpLowering>(converter);
-}
-
-void mlir::populateStdToLLVMFuncOpConversionPattern(
-    LLVMTypeConverter &converter, RewritePatternSet &patterns) {
-  if (converter.getOptions().useBarePtrCallConv)
-    patterns.add<BarePtrFuncOpConversion>(converter);
-  else
-    patterns.add<FuncOpConversion>(converter);
-}
-
-void mlir::populateStdToLLVMConversionPatterns(LLVMTypeConverter &converter,
-                                               RewritePatternSet &patterns) {
-  populateStdToLLVMFuncOpConversionPattern(converter, patterns);
-  populateStdToLLVMNonMemoryConversionPatterns(converter, patterns);
-  populateStdToLLVMMemoryConversionPatterns(converter, patterns);
-}
-
 namespace {
 /// A pass converting MLIR operations into the LLVM IR dialect.
 struct LLVMLoweringPass : public ConvertStandardToLLVMBase<LLVMLoweringPass> {
@@ -2752,7 +1437,6 @@ struct LLVMLoweringPass : public ConvertStandardToLLVMBase<LLVMLoweringPass> {
     this->useBarePtrCallConv = useBarePtrCallConv;
     this->emitCWrappers = emitCWrappers;
     this->indexBitwidth = indexBitwidth;
-    this->useAlignedAlloc = useAlignedAlloc;
     this->dataLayout = dataLayout.getStringRepresentation();
   }
 
@@ -2782,9 +1466,6 @@ struct LLVMLoweringPass : public ConvertStandardToLLVMBase<LLVMLoweringPass> {
     options.emitCWrappers = emitCWrappers;
     if (indexBitwidth != kDeriveIndexBitwidthFromDataLayout)
       options.overrideIndexBitwidth(indexBitwidth);
-    options.allocLowering =
-        (useAlignedAlloc ? LowerToLLVMOptions::AllocLowering::AlignedAlloc
-                         : LowerToLLVMOptions::AllocLowering::Malloc);
     options.dataLayout = llvm::DataLayout(this->dataLayout);
 
     LLVMTypeConverter typeConverter(&getContext(), options,
@@ -2802,54 +1483,6 @@ struct LLVMLoweringPass : public ConvertStandardToLLVMBase<LLVMLoweringPass> {
 };
 } // end namespace
 
-Value AllocLikeOpLLVMLowering::createAligned(
-    ConversionPatternRewriter &rewriter, Location loc, Value input,
-    Value alignment) {
-  Value one = createIndexAttrConstant(rewriter, loc, alignment.getType(), 1);
-  Value bump = rewriter.create<LLVM::SubOp>(loc, alignment, one);
-  Value bumped = rewriter.create<LLVM::AddOp>(loc, input, bump);
-  Value mod = rewriter.create<LLVM::URemOp>(loc, bumped, alignment);
-  return rewriter.create<LLVM::SubOp>(loc, bumped, mod);
-}
-
-LogicalResult AllocLikeOpLLVMLowering::matchAndRewrite(
-    Operation *op, ArrayRef<Value> operands,
-    ConversionPatternRewriter &rewriter) const {
-  MemRefType memRefType = getMemRefResultType(op);
-  if (!isConvertibleAndHasIdentityMaps(memRefType))
-    return rewriter.notifyMatchFailure(op, "incompatible memref type");
-  auto loc = op->getLoc();
-
-  // Get actual sizes of the memref as values: static sizes are constant
-  // values and dynamic sizes are passed to 'alloc' as operands.  In case of
-  // zero-dimensional memref, assume a scalar (size 1).
-  SmallVector<Value, 4> sizes;
-  SmallVector<Value, 4> strides;
-  Value sizeBytes;
-  this->getMemRefDescriptorSizes(loc, memRefType, operands, rewriter, sizes,
-                                 strides, sizeBytes);
-
-  // Allocate the underlying buffer.
-  Value allocatedPtr;
-  Value alignedPtr;
-  std::tie(allocatedPtr, alignedPtr) =
-      this->allocateBuffer(rewriter, loc, sizeBytes, op);
-
-  // Create the MemRef descriptor.
-  auto memRefDescriptor = this->createMemRefDescriptor(
-      loc, memRefType, allocatedPtr, alignedPtr, sizes, strides, rewriter);
-
-  // Return the final value of the descriptor.
-  rewriter.replaceOp(op, {memRefDescriptor});
-  return success();
-}
-
-mlir::LLVMConversionTarget::LLVMConversionTarget(MLIRContext &ctx)
-    : ConversionTarget(ctx) {
-  this->addLegalDialect<LLVM::LLVMDialect>();
-  this->addIllegalOp<LLVM::DialectCastOp>();
-}
-
 std::unique_ptr<OperationPass<ModuleOp>> mlir::createLowerToLLVMPass() {
   return std::make_unique<LLVMLoweringPass>();
 }

diff  --git a/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt b/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt
index dfc4f7d9506ce..9a7c5aabc9a78 100644
--- a/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt
@@ -18,9 +18,9 @@ add_mlir_conversion_library(MLIRVectorToLLVM
   MLIRArmSVETransforms
   MLIRAMX
   MLIRAMXTransforms
+  MLIRLLVMCommonConversion
   MLIRLLVMIR
   MLIRMemRef
-  MLIRStandardToLLVM
   MLIRTargetLLVMIRExport
   MLIRTransforms
   MLIRVector

diff  --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
index 3662db6aa1d68..b1256cb4f6133 100644
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
@@ -8,8 +8,7 @@
 
 #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
 
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
+#include "mlir/Conversion/LLVMCommon/VectorPattern.h"
 #include "mlir/Dialect/LLVMIR/FunctionCallUtils.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"

diff  --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
index ed154f9a3933f..bebbdc5b73a20 100644
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
@@ -10,8 +10,8 @@
 
 #include "../PassDetail.h"
 
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Dialect/AMX/AMXDialect.h"
 #include "mlir/Dialect/AMX/Transforms.h"
 #include "mlir/Dialect/ArmNeon/ArmNeonDialect.h"

diff  --git a/mlir/lib/Conversion/VectorToROCDL/CMakeLists.txt b/mlir/lib/Conversion/VectorToROCDL/CMakeLists.txt
index 5cc1f5de2bf02..44ed37229210d 100644
--- a/mlir/lib/Conversion/VectorToROCDL/CMakeLists.txt
+++ b/mlir/lib/Conversion/VectorToROCDL/CMakeLists.txt
@@ -13,6 +13,8 @@ add_mlir_conversion_library(MLIRVectorToROCDL
 
   LINK_LIBS PUBLIC
   MLIRROCDLIR
+  MLIRLLVMCommonConversion
+  MLIRMemRefToLLVM
   MLIRStandardToLLVM
   MLIRVector
   MLIRTransforms

diff  --git a/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp b/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp
index 2f033b18c8f11..9685faf649c72 100644
--- a/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp
+++ b/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp
@@ -14,8 +14,10 @@
 #include "mlir/Conversion/VectorToROCDL/VectorToROCDL.h"
 
 #include "../PassDetail.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Conversion/LLVMCommon/Pattern.h"
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
@@ -161,6 +163,7 @@ void LowerVectorToROCDLPass::runOnOperation() {
   RewritePatternSet patterns(&getContext());
 
   populateVectorToROCDLConversionPatterns(converter, patterns);
+  populateMemRefToLLVMConversionPatterns(converter, patterns);
   populateStdToLLVMConversionPatterns(converter, patterns);
 
   LLVMConversionTarget target(getContext());

diff  --git a/mlir/lib/Dialect/AMX/Transforms/CMakeLists.txt b/mlir/lib/Dialect/AMX/Transforms/CMakeLists.txt
index d7cf9e1e58cbe..eb863998d94c4 100644
--- a/mlir/lib/Dialect/AMX/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/AMX/Transforms/CMakeLists.txt
@@ -7,6 +7,6 @@ add_mlir_dialect_library(MLIRAMXTransforms
   LINK_LIBS PUBLIC
   MLIRAMX
   MLIRIR
+  MLIRLLVMCommonConversion
   MLIRLLVMIR
-  MLIRStandardToLLVM
   )

diff  --git a/mlir/lib/Dialect/AMX/Transforms/LegalizeForLLVMExport.cpp b/mlir/lib/Dialect/AMX/Transforms/LegalizeForLLVMExport.cpp
index d9cb5034a8dc0..6a942996a499e 100644
--- a/mlir/lib/Dialect/AMX/Transforms/LegalizeForLLVMExport.cpp
+++ b/mlir/lib/Dialect/AMX/Transforms/LegalizeForLLVMExport.cpp
@@ -8,7 +8,8 @@
 
 #include "mlir/Dialect/AMX/Transforms.h"
 
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Dialect/AMX/AMXDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"

diff  --git a/mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt b/mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt
index f18962d0dc17b..8b2fe73252ef0 100644
--- a/mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt
@@ -7,6 +7,6 @@ add_mlir_dialect_library(MLIRArmSVETransforms
   LINK_LIBS PUBLIC
   MLIRArmSVE
   MLIRIR
+  MLIRLLVMCommonConversion
   MLIRLLVMIR
-  MLIRStandardToLLVM
   )

diff  --git a/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp b/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp
index ba84a955d66dd..ed50f458feacf 100644
--- a/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp
+++ b/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp
@@ -6,7 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Dialect/ArmSVE/ArmSVEDialect.h"
 #include "mlir/Dialect/ArmSVE/Transforms.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"

diff  --git a/mlir/lib/Dialect/X86Vector/Transforms/CMakeLists.txt b/mlir/lib/Dialect/X86Vector/Transforms/CMakeLists.txt
index 558cdaa4291a8..e33568188a76c 100644
--- a/mlir/lib/Dialect/X86Vector/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/X86Vector/Transforms/CMakeLists.txt
@@ -7,6 +7,6 @@ add_mlir_dialect_library(MLIRX86VectorTransforms
   LINK_LIBS PUBLIC
   MLIRX86Vector
   MLIRIR
+  MLIRLLVMCommonConversion
   MLIRLLVMIR
-  MLIRStandardToLLVM
   )

diff  --git a/mlir/lib/Dialect/X86Vector/Transforms/LegalizeForLLVMExport.cpp b/mlir/lib/Dialect/X86Vector/Transforms/LegalizeForLLVMExport.cpp
index ab9e6ff3177cd..6cd028a13db0c 100644
--- a/mlir/lib/Dialect/X86Vector/Transforms/LegalizeForLLVMExport.cpp
+++ b/mlir/lib/Dialect/X86Vector/Transforms/LegalizeForLLVMExport.cpp
@@ -8,7 +8,8 @@
 
 #include "mlir/Dialect/X86Vector/Transforms.h"
 
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/X86Vector/X86VectorDialect.h"

diff  --git a/mlir/test/Conversion/StandardToLLVM/convert-alloca-scope.mlir b/mlir/test/Conversion/MemRefToLLVM/convert-alloca-scope.mlir
similarity index 77%
rename from mlir/test/Conversion/StandardToLLVM/convert-alloca-scope.mlir
rename to mlir/test/Conversion/MemRefToLLVM/convert-alloca-scope.mlir
index 4a16c6796f1e3..1dcf7acd14979 100644
--- a/mlir/test/Conversion/StandardToLLVM/convert-alloca-scope.mlir
+++ b/mlir/test/Conversion/MemRefToLLVM/convert-alloca-scope.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt -convert-std-to-llvm %s | FileCheck %s
+// RUN: mlir-opt -convert-memref-to-llvm %s | FileCheck %s
 
-// CHECK-LABEL: llvm.func @empty
+// CHECK-LABEL: @empty
 func @empty() {
   // CHECK: llvm.intr.stacksave 
   // CHECK: llvm.br
@@ -9,11 +9,10 @@ func @empty() {
   }
   // CHECK: llvm.intr.stackrestore 
   // CHECK: llvm.br
-  // CHECK: llvm.return
   return
 }
 
-// CHECK-LABEL: llvm.func @returns_nothing
+// CHECK-LABEL: @returns_nothing
 func @returns_nothing(%b: f32) {
   %a = constant 10.0 : f32
   // CHECK: llvm.intr.stacksave 
@@ -22,11 +21,10 @@ func @returns_nothing(%b: f32) {
     memref.alloca_scope.return
   }
   // CHECK: llvm.intr.stackrestore 
-  // CHECK: llvm.return
   return
 }
 
-// CHECK-LABEL: llvm.func @returns_one_value
+// CHECK-LABEL: @returns_one_value
 func @returns_one_value(%b: f32) -> f32 {
   %a = constant 10.0 : f32
   // CHECK: llvm.intr.stacksave 
@@ -35,11 +33,10 @@ func @returns_one_value(%b: f32) -> f32 {
     memref.alloca_scope.return %c: f32
   }
   // CHECK: llvm.intr.stackrestore 
-  // CHECK: llvm.return
   return %result : f32
 }
 
-// CHECK-LABEL: llvm.func @returns_multiple_values
+// CHECK-LABEL: @returns_multiple_values
 func @returns_multiple_values(%b: f32) -> f32 {
   %a = constant 10.0 : f32
   // CHECK: llvm.intr.stacksave 
@@ -49,7 +46,6 @@ func @returns_multiple_values(%b: f32) -> f32 {
     memref.alloca_scope.return %c, %d: f32, f32
   }
   // CHECK: llvm.intr.stackrestore 
-  // CHECK: llvm.return
   %result = std.addf %result1, %result2 : f32
   return %result : f32
 }

diff  --git a/mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir b/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir
similarity index 83%
rename from mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir
rename to mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir
index e66810c3add57..649f2bf13f6f9 100644
--- a/mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir
+++ b/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir
@@ -1,33 +1,11 @@
-// RUN: mlir-opt -split-input-file -convert-std-to-llvm %s | FileCheck %s
-// RUN: mlir-opt -split-input-file -convert-std-to-llvm='use-aligned-alloc=1' %s | FileCheck %s --check-prefix=ALIGNED-ALLOC
-
-// CHECK-LABEL: func @check_strided_memref_arguments(
-// CHECK-COUNT-2: !llvm.ptr<f32>
-// CHECK-COUNT-5: i64
-// CHECK-COUNT-2: !llvm.ptr<f32>
-// CHECK-COUNT-5: i64
-// CHECK-COUNT-2: !llvm.ptr<f32>
-// CHECK-COUNT-5: i64
-func @check_strided_memref_arguments(%static: memref<10x20xf32, affine_map<(i,j)->(20 * i + j + 1)>>,
-                                     %dynamic : memref<?x?xf32, affine_map<(i,j)[M]->(M * i + j + 1)>>,
-                                     %mixed : memref<10x?xf32, affine_map<(i,j)[M]->(M * i + j + 1)>>) {
-  return
-}
-
-// CHECK-LABEL: func @check_arguments
-// CHECK-COUNT-2: !llvm.ptr<f32>
-// CHECK-COUNT-5: i64
-// CHECK-COUNT-2: !llvm.ptr<f32>
-// CHECK-COUNT-5: i64
-// CHECK-COUNT-2: !llvm.ptr<f32>
-// CHECK-COUNT-5: i64
-func @check_arguments(%static: memref<10x20xf32>, %dynamic : memref<?x?xf32>, %mixed : memref<10x?xf32>) {
-  return
-}
+// RUN: mlir-opt -split-input-file -convert-memref-to-llvm %s | FileCheck %s
+// RUN: mlir-opt -split-input-file -convert-memref-to-llvm='use-aligned-alloc=1' %s | FileCheck %s --check-prefix=ALIGNED-ALLOC
 
 // CHECK-LABEL: func @mixed_alloc(
-//       CHECK:   %[[M:.*]]: i64, %[[N:.*]]: i64) -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)> {
+//       CHECK:   %[[Marg:.*]]: index, %[[Narg:.*]]: index)
 func @mixed_alloc(%arg0: index, %arg1: index) -> memref<?x42x?xf32> {
+//       CHECK:  %[[M:.*]] = llvm.mlir.cast %[[Marg]]
+//       CHECK:  %[[N:.*]] = llvm.mlir.cast %[[Narg]]
 //       CHECK:  %[[c42:.*]] = llvm.mlir.constant(42 : index) : i64
 //  CHECK-NEXT:  %[[one:.*]] = llvm.mlir.constant(1 : index) : i64
 //  CHECK-NEXT:  %[[st0:.*]] = llvm.mul %[[N]], %[[c42]] : i64
@@ -49,23 +27,27 @@ func @mixed_alloc(%arg0: index, %arg1: index) -> memref<?x42x?xf32> {
 //  CHECK-NEXT:  llvm.insertvalue %[[N]], %{{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
 //  CHECK-NEXT:  llvm.insertvalue %[[one]], %{{.*}}[4, 2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
   %0 = memref.alloc(%arg0, %arg1) : memref<?x42x?xf32>
-//  CHECK-NEXT:  llvm.return %{{.*}} : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
   return %0 : memref<?x42x?xf32>
 }
 
+// -----
+
 // CHECK-LABEL: func @mixed_dealloc
 func @mixed_dealloc(%arg0: memref<?x42x?xf32>) {
 //      CHECK:  %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
 // CHECK-NEXT:  %[[ptri8:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr<f32> to !llvm.ptr<i8>
 // CHECK-NEXT:  llvm.call @free(%[[ptri8]]) : (!llvm.ptr<i8>) -> ()
   memref.dealloc %arg0 : memref<?x42x?xf32>
-// CHECK-NEXT:  llvm.return
   return
 }
 
+// -----
+
 // CHECK-LABEL: func @dynamic_alloc(
-//       CHECK:   %[[M:.*]]: i64, %[[N:.*]]: i64) -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)> {
+//       CHECK:   %[[Marg:.*]]: index, %[[Narg:.*]]: index)
 func @dynamic_alloc(%arg0: index, %arg1: index) -> memref<?x?xf32> {
+//       CHECK:  %[[M:.*]] = llvm.mlir.cast %[[Marg]]
+//       CHECK:  %[[N:.*]] = llvm.mlir.cast %[[Narg]]
 //  CHECK-NEXT:  %[[one:.*]] = llvm.mlir.constant(1 : index) : i64
 //  CHECK-NEXT:  %[[sz:.*]] = llvm.mul %[[N]], %[[M]] : i64
 //  CHECK-NEXT:  %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
@@ -83,15 +65,16 @@ func @dynamic_alloc(%arg0: index, %arg1: index) -> memref<?x?xf32> {
 //  CHECK-NEXT:  llvm.insertvalue %[[N]], %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 //  CHECK-NEXT:  llvm.insertvalue %[[one]], %{{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
   %0 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
-//  CHECK-NEXT:  llvm.return %{{.*}} : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
   return %0 : memref<?x?xf32>
 }
 
 // -----
 
 // CHECK-LABEL: func @dynamic_alloca
-// CHECK: %[[M:.*]]: i64, %[[N:.*]]: i64) -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)> {
+// CHECK: %[[Marg:.*]]: index, %[[Narg:.*]]: index)
 func @dynamic_alloca(%arg0: index, %arg1: index) -> memref<?x?xf32> {
+//       CHECK:  %[[M:.*]] = llvm.mlir.cast %[[Marg]]
+//       CHECK:  %[[N:.*]] = llvm.mlir.cast %[[Narg]]
 //  CHECK-NEXT:  %[[st1:.*]] = llvm.mlir.constant(1 : index) : i64
 //  CHECK-NEXT:  %[[num_elems:.*]] = llvm.mul %[[N]], %[[M]] : i64
 //  CHECK-NEXT:  %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
@@ -120,6 +103,8 @@ func @dynamic_alloca(%arg0: index, %arg1: index) -> memref<?x?xf32> {
   return %0 : memref<?x?xf32>
 }
 
+// -----
+
 // CHECK-LABEL: func @dynamic_dealloc
 func @dynamic_dealloc(%arg0: memref<?x?xf32>) {
 //      CHECK:  %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
@@ -129,8 +114,10 @@ func @dynamic_dealloc(%arg0: memref<?x?xf32>) {
   return
 }
 
-// CHECK-LABEL: func @stdlib_aligned_alloc({{.*}}) -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)> {
-// ALIGNED-ALLOC-LABEL: func @stdlib_aligned_alloc({{.*}}) -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)> {
+// -----
+
+// CHECK-LABEL: func @stdlib_aligned_alloc({{.*}})
+// ALIGNED-ALLOC-LABEL: func @stdlib_aligned_alloc({{.*}})
 func @stdlib_aligned_alloc(%N : index) -> memref<32x18xf32> {
 // ALIGNED-ALLOC-NEXT:  %[[sz1:.*]] = llvm.mlir.constant(32 : index) : i64
 // ALIGNED-ALLOC-NEXT:  %[[sz2:.*]] = llvm.mlir.constant(18 : index) : i64
@@ -175,12 +162,13 @@ func @stdlib_aligned_alloc(%N : index) -> memref<32x18xf32> {
   return %0 : memref<32x18xf32>
 }
 
+// -----
+
 // CHECK-LABEL: func @mixed_load(
-// CHECK-COUNT-2: !llvm.ptr<f32>,
-// CHECK-COUNT-5: {{%[a-zA-Z0-9]*}}: i64
-// CHECK:         %[[I:.*]]: i64,
-// CHECK:         %[[J:.*]]: i64)
+// CHECK:         %{{.*}}, %[[Iarg:.*]]: index, %[[Jarg:.*]]: index)
 func @mixed_load(%mixed : memref<42x?xf32>, %i : index, %j : index) {
+//       CHECK:  %[[I:.*]] = llvm.mlir.cast %[[Iarg]]
+//       CHECK:  %[[J:.*]] = llvm.mlir.cast %[[Jarg]]
 //       CHECK:  %[[ptr:.*]] = llvm.extractvalue %[[ld:.*]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 //  CHECK-NEXT:  %[[st0:.*]] = llvm.extractvalue %[[ld]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 //  CHECK-NEXT:  %[[offI:.*]] = llvm.mul %[[I]], %[[st0]] : i64
@@ -191,17 +179,13 @@ func @mixed_load(%mixed : memref<42x?xf32>, %i : index, %j : index) {
   return
 }
 
+// -----
+
 // CHECK-LABEL: func @dynamic_load(
-// CHECK-SAME:         %[[ARG0:[a-zA-Z0-9]*]]: !llvm.ptr<f32>
-// CHECK-SAME:         %[[ARG1:[a-zA-Z0-9]*]]: !llvm.ptr<f32>
-// CHECK-SAME:         %[[ARG2:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG3:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG4:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG5:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG6:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[I:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[J:[a-zA-Z0-9]*]]: i64
+// CHECK:         %{{.*}}, %[[Iarg:.*]]: index, %[[Jarg:.*]]: index)
 func @dynamic_load(%dynamic : memref<?x?xf32>, %i : index, %j : index) {
+//       CHECK:  %[[I:.*]] = llvm.mlir.cast %[[Iarg]]
+//       CHECK:  %[[J:.*]] = llvm.mlir.cast %[[Jarg]]
 //       CHECK:  %[[ptr:.*]] = llvm.extractvalue %[[ld:.*]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 //  CHECK-NEXT:  %[[st0:.*]] = llvm.extractvalue %[[ld]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 //  CHECK-NEXT:  %[[offI:.*]] = llvm.mul %[[I]], %[[st0]] : i64
@@ -212,17 +196,13 @@ func @dynamic_load(%dynamic : memref<?x?xf32>, %i : index, %j : index) {
   return
 }
 
+// -----
+
 // CHECK-LABEL: func @prefetch
-// CHECK-SAME:         %[[ARG0:[a-zA-Z0-9]*]]: !llvm.ptr<f32>
-// CHECK-SAME:         %[[ARG1:[a-zA-Z0-9]*]]: !llvm.ptr<f32>
-// CHECK-SAME:         %[[ARG2:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG3:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG4:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG5:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG6:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[I:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[J:[a-zA-Z0-9]*]]: i64
+// CHECK:         %{{.*}}, %[[Iarg:.*]]: index, %[[Jarg:.*]]: index)
 func @prefetch(%A : memref<?x?xf32>, %i : index, %j : index) {
+//      CHECK:  %[[I:.*]] = llvm.mlir.cast %[[Iarg]]
+//      CHECK:  %[[J:.*]] = llvm.mlir.cast %[[Jarg]]
 //      CHECK:  %[[ptr:.*]] = llvm.extractvalue %[[ld:.*]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK-NEXT:  %[[st0:.*]] = llvm.extractvalue %[[ld]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK-NEXT:  %[[offI:.*]] = llvm.mul %[[I]], %[[st0]] : i64
@@ -246,17 +226,13 @@ func @prefetch(%A : memref<?x?xf32>, %i : index, %j : index) {
   return
 }
 
+// -----
+
 // CHECK-LABEL: func @dynamic_store
-// CHECK-SAME:         %[[ARG0:[a-zA-Z0-9]*]]: !llvm.ptr<f32>
-// CHECK-SAME:         %[[ARG1:[a-zA-Z0-9]*]]: !llvm.ptr<f32>
-// CHECK-SAME:         %[[ARG2:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG3:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG4:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG5:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG6:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[I:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[J:[a-zA-Z0-9]*]]: i64
+// CHECK:         %{{.*}}, %[[Iarg:.*]]: index, %[[Jarg:.*]]: index
 func @dynamic_store(%dynamic : memref<?x?xf32>, %i : index, %j : index, %val : f32) {
+//       CHECK:  %[[I:.*]] = llvm.mlir.cast %[[Iarg]]
+//       CHECK:  %[[J:.*]] = llvm.mlir.cast %[[Jarg]]
 //       CHECK:  %[[ptr:.*]] = llvm.extractvalue %[[ld:.*]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 //  CHECK-NEXT:  %[[st0:.*]] = llvm.extractvalue %[[ld]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 //  CHECK-NEXT:  %[[offI:.*]] = llvm.mul %[[I]], %[[st0]] : i64
@@ -267,17 +243,13 @@ func @dynamic_store(%dynamic : memref<?x?xf32>, %i : index, %j : index, %val : f
   return
 }
 
+// -----
+
 // CHECK-LABEL: func @mixed_store
-// CHECK-SAME:         %[[ARG0:[a-zA-Z0-9]*]]: !llvm.ptr<f32>
-// CHECK-SAME:         %[[ARG1:[a-zA-Z0-9]*]]: !llvm.ptr<f32>
-// CHECK-SAME:         %[[ARG2:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG3:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG4:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG5:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG6:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[I:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[J:[a-zA-Z0-9]*]]: i64
+// CHECK:         %{{.*}}, %[[Iarg:.*]]: index, %[[Jarg:.*]]: index
 func @mixed_store(%mixed : memref<42x?xf32>, %i : index, %j : index, %val : f32) {
+//       CHECK:  %[[I:.*]] = llvm.mlir.cast %[[Iarg]]
+//       CHECK:  %[[J:.*]] = llvm.mlir.cast %[[Jarg]]
 //       CHECK:  %[[ptr:.*]] = llvm.extractvalue %[[ld:.*]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 //  CHECK-NEXT:  %[[st0:.*]] = llvm.extractvalue %[[ld]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 //  CHECK-NEXT:  %[[offI:.*]] = llvm.mul %[[I]], %[[st0]] : i64
@@ -288,6 +260,8 @@ func @mixed_store(%mixed : memref<42x?xf32>, %i : index, %j : index, %val : f32)
   return
 }
 
+// -----
+
 // CHECK-LABEL: func @memref_cast_static_to_dynamic
 func @memref_cast_static_to_dynamic(%static : memref<10x42xf32>) {
 // CHECK-NOT: llvm.bitcast
@@ -295,6 +269,8 @@ func @memref_cast_static_to_dynamic(%static : memref<10x42xf32>) {
   return
 }
 
+// -----
+
 // CHECK-LABEL: func @memref_cast_static_to_mixed
 func @memref_cast_static_to_mixed(%static : memref<10x42xf32>) {
 // CHECK-NOT: llvm.bitcast
@@ -302,6 +278,8 @@ func @memref_cast_static_to_mixed(%static : memref<10x42xf32>) {
   return
 }
 
+// -----
+
 // CHECK-LABEL: func @memref_cast_dynamic_to_static
 func @memref_cast_dynamic_to_static(%dynamic : memref<?x?xf32>) {
 // CHECK-NOT: llvm.bitcast
@@ -309,6 +287,8 @@ func @memref_cast_dynamic_to_static(%dynamic : memref<?x?xf32>) {
   return
 }
 
+// -----
+
 // CHECK-LABEL: func @memref_cast_dynamic_to_mixed
 func @memref_cast_dynamic_to_mixed(%dynamic : memref<?x?xf32>) {
 // CHECK-NOT: llvm.bitcast
@@ -316,6 +296,8 @@ func @memref_cast_dynamic_to_mixed(%dynamic : memref<?x?xf32>) {
   return
 }
 
+// -----
+
 // CHECK-LABEL: func @memref_cast_mixed_to_dynamic
 func @memref_cast_mixed_to_dynamic(%mixed : memref<42x?xf32>) {
 // CHECK-NOT: llvm.bitcast
@@ -323,6 +305,8 @@ func @memref_cast_mixed_to_dynamic(%mixed : memref<42x?xf32>) {
   return
 }
 
+// -----
+
 // CHECK-LABEL: func @memref_cast_mixed_to_static
 func @memref_cast_mixed_to_static(%mixed : memref<42x?xf32>) {
 // CHECK-NOT: llvm.bitcast
@@ -330,6 +314,8 @@ func @memref_cast_mixed_to_static(%mixed : memref<42x?xf32>) {
   return
 }
 
+// -----
+
 // CHECK-LABEL: func @memref_cast_mixed_to_mixed
 func @memref_cast_mixed_to_mixed(%mixed : memref<42x?xf32>) {
 // CHECK-NOT: llvm.bitcast
@@ -337,6 +323,8 @@ func @memref_cast_mixed_to_mixed(%mixed : memref<42x?xf32>) {
   return
 }
 
+// -----
+
 // CHECK-LABEL: func @memref_cast_ranked_to_unranked
 func @memref_cast_ranked_to_unranked(%arg : memref<42x2x?xf32>) {
 // CHECK-DAG:  %[[c:.*]] = llvm.mlir.constant(1 : index) : i64
@@ -351,6 +339,8 @@ func @memref_cast_ranked_to_unranked(%arg : memref<42x2x?xf32>) {
   return
 }
 
+// -----
+
 // CHECK-LABEL: func @memref_cast_unranked_to_ranked
 func @memref_cast_unranked_to_ranked(%arg : memref<*xf32>) {
 //      CHECK: %[[p:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(i64, ptr<i8>)>
@@ -359,49 +349,47 @@ func @memref_cast_unranked_to_ranked(%arg : memref<*xf32>) {
   return
 }
 
+// -----
+
 // CHECK-LABEL: func @mixed_memref_dim
 func @mixed_memref_dim(%mixed : memref<42x?x?x13x?xf32>) {
 // CHECK: llvm.mlir.constant(42 : index) : i64
   %c0 = constant 0 : index
   %0 = memref.dim %mixed, %c0 : memref<42x?x?x13x?xf32>
-// CHECK: llvm.extractvalue %[[ld:.*]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<5 x i64>, array<5 x i64>)>
+// CHECK: llvm.extractvalue %{{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<5 x i64>, array<5 x i64>)>
   %c1 = constant 1 : index
   %1 = memref.dim %mixed, %c1 : memref<42x?x?x13x?xf32>
-// CHECK: llvm.extractvalue %[[ld]][3, 2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<5 x i64>, array<5 x i64>)>
+// CHECK: llvm.extractvalue %{{.*}}[3, 2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<5 x i64>, array<5 x i64>)>
   %c2 = constant 2 : index
   %2 = memref.dim %mixed, %c2 : memref<42x?x?x13x?xf32>
 // CHECK: llvm.mlir.constant(13 : index) : i64
   %c3 = constant 3 : index
   %3 = memref.dim %mixed, %c3 : memref<42x?x?x13x?xf32>
-// CHECK: llvm.extractvalue %[[ld]][3, 4] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<5 x i64>, array<5 x i64>)>
+// CHECK: llvm.extractvalue %{{.*}}[3, 4] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<5 x i64>, array<5 x i64>)>
   %c4 = constant 4 : index
   %4 = memref.dim %mixed, %c4 : memref<42x?x?x13x?xf32>
   return
 }
 
+// -----
+
 // CHECK-LABEL: @memref_dim_with_dyn_index
-// CHECK-SAME: %[[ALLOC_PTR:.*]]: !llvm.ptr<f32>, %[[ALIGN_PTR:.*]]: !llvm.ptr<f32>, %[[OFFSET:.*]]: i64, %[[SIZE0:.*]]: i64, %[[SIZE1:.*]]: i64, %[[STRIDE0:.*]]: i64, %[[STRIDE1:.*]]: i64, %[[IDX:.*]]: i64) -> i64
+// CHECK: %{{.*}}, %[[IDXarg:.*]]: index
 func @memref_dim_with_dyn_index(%arg : memref<3x?xf32>, %idx : index) -> index {
-  // CHECK-NEXT: %[[DESCR0:.*]] = llvm.mlir.undef : [[DESCR_TY:!llvm.struct<\(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>\)>]]
-  // CHECK-NEXT: %[[DESCR1:.*]] = llvm.insertvalue %[[ALLOC_PTR]], %[[DESCR0]][0] : [[DESCR_TY]]
-  // CHECK-NEXT: %[[DESCR2:.*]] = llvm.insertvalue %[[ALIGN_PTR]], %[[DESCR1]][1] : [[DESCR_TY]]
-  // CHECK-NEXT: %[[DESCR3:.*]] = llvm.insertvalue %[[OFFSET]],    %[[DESCR2]][2] : [[DESCR_TY]]
-  // CHECK-NEXT: %[[DESCR4:.*]] = llvm.insertvalue %[[SIZE0]],     %[[DESCR3]][3, 0] : [[DESCR_TY]]
-  // CHECK-NEXT: %[[DESCR5:.*]] = llvm.insertvalue %[[STRIDE0]],   %[[DESCR4]][4, 0] : [[DESCR_TY]]
-  // CHECK-NEXT: %[[DESCR6:.*]] = llvm.insertvalue %[[SIZE1]],     %[[DESCR5]][3, 1] : [[DESCR_TY]]
-  // CHECK-NEXT: %[[DESCR7:.*]] = llvm.insertvalue %[[STRIDE1]],   %[[DESCR6]][4, 1] : [[DESCR_TY]]
   // CHECK-DAG: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64
   // CHECK-DAG: %[[C1:.*]] = llvm.mlir.constant(1 : index) : i64
-  // CHECK-DAG: %[[SIZES:.*]] = llvm.extractvalue %[[DESCR7]][3] : [[DESCR_TY]]
+  // CHECK-DAG: %[[SIZES:.*]] = llvm.extractvalue %{{.*}}[3] : ![[DESCR_TY:.*]]
   // CHECK-DAG: %[[SIZES_PTR:.*]] = llvm.alloca %[[C1]] x !llvm.array<2 x i64> : (i64) -> !llvm.ptr<array<2 x i64>>
   // CHECK-DAG: llvm.store %[[SIZES]], %[[SIZES_PTR]] : !llvm.ptr<array<2 x i64>>
+  // CHECK-DAG: %[[IDX:.*]] = llvm.mlir.cast %[[IDXarg]]
   // CHECK-DAG: %[[RESULT_PTR:.*]] = llvm.getelementptr %[[SIZES_PTR]][%[[C0]], %[[IDX]]] : (!llvm.ptr<array<2 x i64>>, i64, i64) -> !llvm.ptr<i64>
   // CHECK-DAG: %[[RESULT:.*]] = llvm.load %[[RESULT_PTR]] : !llvm.ptr<i64>
-  // CHECK-DAG: llvm.return %[[RESULT]] : i64
   %result = memref.dim %arg, %idx : memref<3x?xf32>
   return %result : index
 }
 
+// -----
+
 // CHECK-LABEL: @memref_reinterpret_cast_ranked_to_static_shape
 func @memref_reinterpret_cast_ranked_to_static_shape(%input : memref<2x3xf32>) {
   %output = memref.reinterpret_cast %input to
@@ -409,7 +397,8 @@ func @memref_reinterpret_cast_ranked_to_static_shape(%input : memref<2x3xf32>) {
            : memref<2x3xf32> to memref<6x1xf32>
   return
 }
-// CHECK: [[INPUT:%.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : [[TY:!.*]]
+// CHECK: [[INPUT:%.*]] = llvm.mlir.cast %{{.*}} :
+// CHECK: to [[TY:!.*]]
 // CHECK: [[OUT_0:%.*]] = llvm.mlir.undef : [[TY]]
 // CHECK: [[BASE_PTR:%.*]] = llvm.extractvalue [[INPUT]][0] : [[TY]]
 // CHECK: [[ALIGNED_PTR:%.*]] = llvm.extractvalue [[INPUT]][1] : [[TY]]
@@ -426,6 +415,8 @@ func @memref_reinterpret_cast_ranked_to_static_shape(%input : memref<2x3xf32>) {
 // CHECK: [[STRIDE_1:%.*]] = llvm.mlir.constant(1 : index) : i64
 // CHECK: [[OUT_7:%.*]] = llvm.insertvalue [[STRIDE_1]], [[OUT_6]][4, 1] : [[TY]]
 
+// -----
+
 // CHECK-LABEL: @memref_reinterpret_cast_unranked_to_dynamic_shape
 func @memref_reinterpret_cast_unranked_to_dynamic_shape(%offset: index,
                                                         %size_0 : index,
@@ -439,10 +430,15 @@ func @memref_reinterpret_cast_unranked_to_dynamic_shape(%offset: index,
            : memref<*xf32> to memref<?x?xf32, offset: ?, strides: [?, ?]>
   return
 }
-// CHECK-SAME: ([[OFFSET:%[a-z,0-9]+]]: i64,
-// CHECK-SAME: [[SIZE_0:%[a-z,0-9]+]]: i64, [[SIZE_1:%[a-z,0-9]+]]: i64,
-// CHECK-SAME: [[STRIDE_0:%[a-z,0-9]+]]: i64, [[STRIDE_1:%[a-z,0-9]+]]: i64,
-// CHECK: [[INPUT:%.*]] = llvm.insertvalue {{.*}}[1] : !llvm.struct<(i64, ptr<i8>)>
+// CHECK-SAME: ([[OFFSETarg:%[a-z,0-9]+]]: index,
+// CHECK-SAME: [[SIZE_0arg:%[a-z,0-9]+]]: index, [[SIZE_1arg:%[a-z,0-9]+]]: index,
+// CHECK-SAME: [[STRIDE_0arg:%[a-z,0-9]+]]: index, [[STRIDE_1arg:%[a-z,0-9]+]]: index,
+// CHECK: [[INPUT:%.*]] = llvm.mlir.cast
+// CHECK: [[OFFSET:%.*]] = llvm.mlir.cast [[OFFSETarg]]
+// CHECK: [[SIZE_0:%.*]] = llvm.mlir.cast [[SIZE_0arg]]
+// CHECK: [[SIZE_1:%.*]] = llvm.mlir.cast [[SIZE_1arg]]
+// CHECK: [[STRIDE_0:%.*]] = llvm.mlir.cast [[STRIDE_0arg]]
+// CHECK: [[STRIDE_1:%.*]] = llvm.mlir.cast [[STRIDE_1arg]]
 // CHECK: [[OUT_0:%.*]] = llvm.mlir.undef : [[TY:!.*]]
 // CHECK: [[DESCRIPTOR:%.*]] = llvm.extractvalue [[INPUT]][1] : !llvm.struct<(i64, ptr<i8>)>
 // CHECK: [[BASE_PTR_PTR:%.*]] = llvm.bitcast [[DESCRIPTOR]] : !llvm.ptr<i8> to !llvm.ptr<ptr<f32>>
@@ -460,14 +456,16 @@ func @memref_reinterpret_cast_unranked_to_dynamic_shape(%offset: index,
 // CHECK: [[OUT_6:%.*]] = llvm.insertvalue [[SIZE_1]], [[OUT_5]][3, 1] : [[TY]]
 // CHECK: [[OUT_7:%.*]] = llvm.insertvalue [[STRIDE_1]], [[OUT_6]][4, 1] : [[TY]]
 
+// -----
+
 // CHECK-LABEL: @memref_reshape
 func @memref_reshape(%input : memref<2x3xf32>, %shape : memref<?xindex>) {
   %output = memref.reshape %input(%shape)
                 : (memref<2x3xf32>, memref<?xindex>) -> memref<*xf32>
   return
 }
-// CHECK: [[INPUT:%.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : [[INPUT_TY:!.*]]
-// CHECK: [[SHAPE:%.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : [[SHAPE_TY:!.*]]
+// CHECK: [[INPUT:%.*]] = llvm.mlir.cast %{{.*}} to [[INPUT_TY:!.*]]
+// CHECK: [[SHAPE:%.*]] = llvm.mlir.cast %{{.*}} to [[SHAPE_TY:!.*]]
 // CHECK: [[RANK:%.*]] = llvm.extractvalue [[SHAPE]][3, 0] : [[SHAPE_TY]]
 // CHECK: [[UNRANKED_OUT_O:%.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
 // CHECK: [[UNRANKED_OUT_1:%.*]] = llvm.insertvalue [[RANK]], [[UNRANKED_OUT_O]][0] : !llvm.struct<(i64, ptr<i8>)>
@@ -528,7 +526,7 @@ func @memref_reshape(%input : memref<2x3xf32>, %shape : memref<?xindex>) {
 // CHECK:   llvm.br ^bb1([[STRIDE_COND]], [[UPDATE_STRIDE]] : i64, i64)
 
 // CHECK: ^bb3:
-// CHECK:   llvm.return
+// CHECK:   return
 
 // -----
 

diff  --git a/mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir b/mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir
new file mode 100644
index 0000000000000..2846f77961b2c
--- /dev/null
+++ b/mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir
@@ -0,0 +1,219 @@
+// RUN: mlir-opt -convert-memref-to-llvm -split-input-file %s | FileCheck %s
+
+// CHECK-LABEL: func @zero_d_alloc()
+func @zero_d_alloc() -> memref<f32> {
+// CHECK: %[[one:.*]] = llvm.mlir.constant(1 : index) : i64
+// CHECK: %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
+// CHECK: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
+// CHECK: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
+// CHECK: llvm.call @malloc(%[[size_bytes]]) : (i64) -> !llvm.ptr<i8>
+// CHECK: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<f32>
+// CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
+// CHECK: llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
+// CHECK: llvm.insertvalue %[[ptr]], %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
+// CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64
+// CHECK: llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
+// CHECK: llvm.mlir.cast %{{.*}}
+
+  %0 = memref.alloc() : memref<f32>
+  return %0 : memref<f32>
+}
+
+// -----
+
+// CHECK-LABEL: func @zero_d_dealloc
+func @zero_d_dealloc(%arg0: memref<f32>) {
+// CHECK: llvm.mlir.cast
+// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
+// CHECK: %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr<f32> to !llvm.ptr<i8>
+// CHECK: llvm.call @free(%[[bc]]) : (!llvm.ptr<i8>) -> ()
+
+  memref.dealloc %arg0 : memref<f32>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @aligned_1d_alloc(
+func @aligned_1d_alloc() -> memref<42xf32> {
+// CHECK: %[[sz1:.*]] = llvm.mlir.constant(42 : index) : i64
+// CHECK: %[[st1:.*]] = llvm.mlir.constant(1 : index) : i64
+// CHECK: %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
+// CHECK: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[sz1]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
+// CHECK: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
+// CHECK: %[[alignment:.*]] = llvm.mlir.constant(8 : index) : i64
+// CHECK: %[[allocsize:.*]] = llvm.add %[[size_bytes]], %[[alignment]] : i64
+// CHECK: %[[allocated:.*]] = llvm.call @malloc(%[[allocsize]]) : (i64) -> !llvm.ptr<i8>
+// CHECK: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<f32>
+// CHECK: %[[allocatedAsInt:.*]] = llvm.ptrtoint %[[ptr]] : !llvm.ptr<f32> to i64
+// CHECK: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : i64
+// CHECK: %[[bump:.*]] = llvm.sub %[[alignment]], %[[one_1]] : i64
+// CHECK: %[[bumped:.*]] = llvm.add %[[allocatedAsInt]], %[[bump]] : i64
+// CHECK: %[[mod:.*]] = llvm.urem %[[bumped]], %[[alignment]] : i64
+// CHECK: %[[aligned:.*]] = llvm.sub %[[bumped]], %[[mod]] : i64
+// CHECK: %[[alignedBitCast:.*]] = llvm.inttoptr %[[aligned]] : i64 to !llvm.ptr<f32>
+// CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK: llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK: llvm.insertvalue %[[alignedBitCast]], %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64
+// CHECK: llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
+  %0 = memref.alloc() {alignment = 8} : memref<42xf32>
+  return %0 : memref<42xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @static_alloc()
+func @static_alloc() -> memref<32x18xf32> {
+// CHECK: %[[num_elems:.*]] = llvm.mlir.constant(576 : index) : i64
+// CHECK: %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
+// CHECK: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[num_elems]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
+// CHECK: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
+// CHECK: %[[allocated:.*]] = llvm.call @malloc(%[[size_bytes]]) : (i64) -> !llvm.ptr<i8>
+// CHECK: llvm.bitcast %[[allocated]] : !llvm.ptr<i8> to !llvm.ptr<f32>
+ %0 = memref.alloc() : memref<32x18xf32>
+ return %0 : memref<32x18xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @static_alloca()
+func @static_alloca() -> memref<32x18xf32> {
+// CHECK: %[[sz1:.*]] = llvm.mlir.constant(32 : index) : i64
+// CHECK: %[[sz2:.*]] = llvm.mlir.constant(18 : index) : i64
+// CHECK: %[[st2:.*]] = llvm.mlir.constant(1 : index) : i64
+// CHECK: %[[num_elems:.*]] = llvm.mlir.constant(576 : index) : i64
+// CHECK: %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
+// CHECK: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[num_elems]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
+// CHECK: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
+// CHECK: %[[allocated:.*]] = llvm.alloca %[[size_bytes]] x f32 : (i64) -> !llvm.ptr<f32>
+ %0 = memref.alloca() : memref<32x18xf32>
+
+ // Test with explicitly specified alignment. llvm.alloca takes care of the
+ // alignment. The same pointer is thus used for allocation and aligned
+ // accesses.
+ // CHECK: %[[alloca_aligned:.*]] = llvm.alloca %{{.*}} x f32 {alignment = 32 : i64} : (i64) -> !llvm.ptr<f32>
+ // CHECK: %[[desc:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+ // CHECK: %[[desc1:.*]] = llvm.insertvalue %[[alloca_aligned]], %[[desc]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+ // CHECK: llvm.insertvalue %[[alloca_aligned]], %[[desc1]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+ memref.alloca() {alignment = 32} : memref<32x18xf32>
+ return %0 : memref<32x18xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @static_dealloc
+func @static_dealloc(%static: memref<10x8xf32>) {
+// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr<f32> to !llvm.ptr<i8>
+// CHECK: llvm.call @free(%[[bc]]) : (!llvm.ptr<i8>) -> ()
+  memref.dealloc %static : memref<10x8xf32>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @zero_d_load
+func @zero_d_load(%arg0: memref<f32>) -> f32 {
+// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
+// CHECK: %{{.*}} = llvm.load %[[ptr]] : !llvm.ptr<f32>
+  %0 = memref.load %arg0[] : memref<f32>
+  return %0 : f32
+}
+
+// -----
+
+// CHECK-LABEL: func @static_load
+// CHECK:         %[[MEMREF:.*]]: memref<10x42xf32>,
+// CHECK:         %[[I:.*]]: index,
+// CHECK:         %[[J:.*]]: index)
+func @static_load(%static : memref<10x42xf32>, %i : index, %j : index) {
+// CHECK:  %[[II:.*]] = llvm.mlir.cast %[[I]]
+// CHECK:  %[[JJ:.*]] = llvm.mlir.cast %[[J]]
+// CHECK:  %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK:  %[[st0:.*]] = llvm.mlir.constant(42 : index) : i64
+// CHECK:  %[[offI:.*]] = llvm.mul %[[II]], %[[st0]] : i64
+// CHECK:  %[[off1:.*]] = llvm.add %[[offI]], %[[JJ]] : i64
+// CHECK:  %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
+// CHECK:  llvm.load %[[addr]] : !llvm.ptr<f32>
+  %0 = memref.load %static[%i, %j] : memref<10x42xf32>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @zero_d_store
+func @zero_d_store(%arg0: memref<f32>, %arg1: f32) {
+// CHECK: %[[ptr:.*]] = llvm.extractvalue %[[ld:.*]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
+// CHECK: llvm.store %{{.*}}, %[[ptr]] : !llvm.ptr<f32>
+  memref.store %arg1, %arg0[] : memref<f32>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @static_store
+// CHECK:         %[[MEMREF:.*]]: memref<10x42xf32>,
+// CHECK-SAME:    %[[I:.*]]: index, %[[J:.*]]: index,
+func @static_store(%static : memref<10x42xf32>, %i : index, %j : index, %val : f32) {
+// CHECK: %[[II:.*]] = llvm.mlir.cast %[[I]]
+// CHECK: %[[JJ:.*]] = llvm.mlir.cast %[[J]]
+// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK: %[[st0:.*]] = llvm.mlir.constant(42 : index) : i64
+// CHECK: %[[offI:.*]] = llvm.mul %[[II]], %[[st0]] : i64
+// CHECK: %[[off1:.*]] = llvm.add %[[offI]], %[[JJ]] : i64
+// CHECK: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
+// CHECK: llvm.store %{{.*}}, %[[addr]] : !llvm.ptr<f32>
+
+  memref.store %val, %static[%i, %j] : memref<10x42xf32>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @static_memref_dim
+func @static_memref_dim(%static : memref<42x32x15x13x27xf32>) {
+// CHECK:  llvm.mlir.constant(42 : index) : i64
+  %c0 = constant 0 : index
+  %0 = memref.dim %static, %c0 : memref<42x32x15x13x27xf32>
+// CHECK:  llvm.mlir.constant(32 : index) : i64
+  %c1 = constant 1 : index
+  %1 = memref.dim %static, %c1 : memref<42x32x15x13x27xf32>
+// CHECK:  llvm.mlir.constant(15 : index) : i64
+  %c2 = constant 2 : index
+  %2 = memref.dim %static, %c2 : memref<42x32x15x13x27xf32>
+// CHECK:  llvm.mlir.constant(13 : index) : i64
+  %c3 = constant 3 : index
+  %3 = memref.dim %static, %c3 : memref<42x32x15x13x27xf32>
+// CHECK:  llvm.mlir.constant(27 : index) : i64
+  %c4 = constant 4 : index
+  %4 = memref.dim %static, %c4 : memref<42x32x15x13x27xf32>
+  return
+}
+
+// -----
+
+// Check that consistent types are emitted in address arithemic in presence of
+// a data layout specification.
+module attributes { dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<index, 32>> } {
+  func @address() {
+    %c1 = constant 1 : index
+    %0 = memref.alloc(%c1) : memref<? x vector<2xf32>>
+    // CHECK: %[[CST_S:.*]] = constant 1 : index
+    // CHECK: %[[CST:.*]] = llvm.mlir.cast
+    // CHECK: llvm.mlir.null
+    // CHECK: llvm.getelementptr %{{.*}}[[CST]]
+    // CHECK: llvm.ptrtoint %{{.*}} : !llvm.ptr<{{.*}}> to i32
+    // CHECK: llvm.ptrtoint %{{.*}} : !llvm.ptr<{{.*}}> to i32
+    // CHECK: llvm.add %{{.*}} : i32
+    // CHECK: llvm.call @malloc(%{{.*}}) : (i32) -> !llvm.ptr
+    // CHECK: llvm.ptrtoint %{{.*}} : !llvm.ptr<{{.*}}> to i32
+    // CHECK: llvm.sub {{.*}} : i32
+    // CHECK: llvm.add {{.*}} : i32
+    // CHECK: llvm.urem {{.*}} : i32
+    // CHECK: llvm.sub {{.*}} : i32
+    // CHECK: llvm.inttoptr %{{.*}} : i32 to !llvm.ptr
+    return
+  }
+}
+

diff  --git a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir
new file mode 100644
index 0000000000000..2c4d386a9b3f1
--- /dev/null
+++ b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir
@@ -0,0 +1,700 @@
+// RUN: mlir-opt -convert-memref-to-llvm %s -split-input-file | FileCheck %s
+// RUN: mlir-opt -convert-memref-to-llvm='index-bitwidth=32' %s -split-input-file | FileCheck --check-prefix=CHECK32 %s
+
+
+// CHECK-LABEL: func @view(
+// CHECK: %[[ARG0F:.*]]: index, %[[ARG1F:.*]]: index, %[[ARG2F:.*]]: index
+func @view(%arg0 : index, %arg1 : index, %arg2 : index) {
+  // CHECK: llvm.mlir.constant(2048 : index) : i64
+  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
+  %0 = memref.alloc() : memref<2048xi8>
+
+  // Test two dynamic sizes.
+  // CHECK: %[[ARG2:.*]] = llvm.mlir.cast %[[ARG2F:.*]]
+  // CHECK: %[[ARG0:.*]] = llvm.mlir.cast %[[ARG0F:.*]]
+  // CHECK: %[[ARG1:.*]] = llvm.mlir.cast %[[ARG1F:.*]]
+  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[BASE_PTR:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
+  // CHECK: %[[SHIFTED_BASE_PTR:.*]] = llvm.getelementptr %[[BASE_PTR]][%[[ARG2]]] : (!llvm.ptr<i8>, i64) -> !llvm.ptr<i8>
+  // CHECK: %[[CAST_SHIFTED_BASE_PTR:.*]] = llvm.bitcast %[[SHIFTED_BASE_PTR]] : !llvm.ptr<i8> to !llvm.ptr<f32>
+  // CHECK: llvm.insertvalue %[[CAST_SHIFTED_BASE_PTR]], %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64
+  // CHECK: llvm.insertvalue %[[C0]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.insertvalue %[[ARG1]], %{{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.mlir.constant(1 : index) : i64
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.insertvalue %[[ARG0]], %{{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.mul %{{.*}}, %[[ARG1]]
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  %1 = memref.view %0[%arg2][%arg0, %arg1] : memref<2048xi8> to memref<?x?xf32>
+
+  // Test one dynamic size.
+  // CHECK: %[[ARG2:.*]] = llvm.mlir.cast %[[ARG2F:.*]]
+  // CHECK: %[[ARG1:.*]] = llvm.mlir.cast %[[ARG1F:.*]]
+  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[BASE_PTR_2:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
+  // CHECK: %[[SHIFTED_BASE_PTR_2:.*]] = llvm.getelementptr %[[BASE_PTR_2]][%[[ARG2]]] : (!llvm.ptr<i8>, i64) -> !llvm.ptr<i8>
+  // CHECK: %[[CAST_SHIFTED_BASE_PTR_2:.*]] = llvm.bitcast %[[SHIFTED_BASE_PTR_2]] : !llvm.ptr<i8> to !llvm.ptr<f32>
+  // CHECK: llvm.insertvalue %[[CAST_SHIFTED_BASE_PTR_2]], %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[C0_2:.*]] = llvm.mlir.constant(0 : index) : i64
+  // CHECK: llvm.insertvalue %[[C0_2]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.insertvalue %[[ARG1]], %{{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.mlir.constant(1 : index) : i64
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.mlir.constant(4 : index) : i64
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.mul %{{.*}}, %[[ARG1]]
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  %3 = memref.view %0[%arg2][%arg1] : memref<2048xi8> to memref<4x?xf32>
+
+  // Test static sizes.
+  // CHECK: %[[ARG2:.*]] = llvm.mlir.cast %[[ARG2F:.*]]
+  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[BASE_PTR_3:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
+  // CHECK: %[[SHIFTED_BASE_PTR_3:.*]] = llvm.getelementptr %[[BASE_PTR_3]][%[[ARG2]]] : (!llvm.ptr<i8>, i64) -> !llvm.ptr<i8>
+  // CHECK: %[[CAST_SHIFTED_BASE_PTR_3:.*]] = llvm.bitcast %[[SHIFTED_BASE_PTR_3]] : !llvm.ptr<i8> to !llvm.ptr<f32>
+  // CHECK: llvm.insertvalue %[[CAST_SHIFTED_BASE_PTR_3]], %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[C0_3:.*]] = llvm.mlir.constant(0 : index) : i64
+  // CHECK: llvm.insertvalue %[[C0_3]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.mlir.constant(4 : index) : i64
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.mlir.constant(1 : index) : i64
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.mlir.constant(64 : index) : i64
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.mlir.constant(4 : index) : i64
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  %5 = memref.view %0[%arg2][] : memref<2048xi8> to memref<64x4xf32>
+
+  // Test view memory space.
+  // CHECK: llvm.mlir.constant(2048 : index) : i64
+  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<i8, 4>, ptr<i8, 4>, i64, array<1 x i64>, array<1 x i64>)>
+  %6 = memref.alloc() : memref<2048xi8, 4>
+
+  // CHECK: %[[ARG2:.*]] = llvm.mlir.cast %[[ARG2F:.*]]
+  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32, 4>, ptr<f32, 4>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[BASE_PTR_4:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<i8, 4>, ptr<i8, 4>, i64, array<1 x i64>, array<1 x i64>)>
+  // CHECK: %[[SHIFTED_BASE_PTR_4:.*]] = llvm.getelementptr %[[BASE_PTR_4]][%[[ARG2]]] : (!llvm.ptr<i8, 4>, i64) -> !llvm.ptr<i8, 4>
+  // CHECK: %[[CAST_SHIFTED_BASE_PTR_4:.*]] = llvm.bitcast %[[SHIFTED_BASE_PTR_4]] : !llvm.ptr<i8, 4> to !llvm.ptr<f32, 4>
+  // CHECK: llvm.insertvalue %[[CAST_SHIFTED_BASE_PTR_4]], %{{.*}}[1] : !llvm.struct<(ptr<f32, 4>, ptr<f32, 4>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[C0_4:.*]] = llvm.mlir.constant(0 : index) : i64
+  // CHECK: llvm.insertvalue %[[C0_4]], %{{.*}}[2] : !llvm.struct<(ptr<f32, 4>, ptr<f32, 4>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.mlir.constant(4 : index) : i64
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 1] : !llvm.struct<(ptr<f32, 4>, ptr<f32, 4>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.mlir.constant(1 : index) : i64
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm.struct<(ptr<f32, 4>, ptr<f32, 4>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.mlir.constant(64 : index) : i64
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm.struct<(ptr<f32, 4>, ptr<f32, 4>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.mlir.constant(4 : index) : i64
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr<f32, 4>, ptr<f32, 4>, i64, array<2 x i64>, array<2 x i64>)>
+  %7 = memref.view %6[%arg2][] : memref<2048xi8, 4> to memref<64x4xf32, 4>
+
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @subview(
+// CHECK:         %[[MEM:.*]]: memref<{{.*}}>,
+// CHECK:         %[[ARG0f:[a-zA-Z0-9]*]]: index,
+// CHECK:         %[[ARG1f:[a-zA-Z0-9]*]]: index,
+// CHECK:         %[[ARG2f:.*]]: index)
+// CHECK32-LABEL: func @subview(
+// CHECK32:         %[[MEM:.*]]: memref<{{.*}}>,
+// CHECK32:         %[[ARG0f:[a-zA-Z0-9]*]]: index,
+// CHECK32:         %[[ARG1f:[a-zA-Z0-9]*]]: index,
+// CHECK32:         %[[ARG2f:.*]]: index)
+func @subview(%0 : memref<64x4xf32, offset: 0, strides: [4, 1]>, %arg0 : index, %arg1 : index, %arg2 : index) {
+  // CHECK: %[[MEMREF:.*]] = llvm.mlir.cast %[[MEM]]
+  // CHECK32: %[[MEMREF:.*]] = llvm.mlir.cast %[[MEM]]
+
+  // CHECK: %[[ARG0a:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK: %[[ARG1a:.*]] = llvm.mlir.cast %[[ARG1f]]
+  // CHECK: %[[ARG0b:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK: %[[ARG1b:.*]] = llvm.mlir.cast %[[ARG1f]]
+  // CHECK: %[[ARG0c:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK: %[[ARG1c:.*]] = llvm.mlir.cast %[[ARG1f]]
+  // CHECK32: %[[ARG0a:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK32: %[[ARG1a:.*]] = llvm.mlir.cast %[[ARG1f]]
+  // CHECK32: %[[ARG0b:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK32: %[[ARG1b:.*]] = llvm.mlir.cast %[[ARG1f]]
+  // CHECK32: %[[ARG0c:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK32: %[[ARG1c:.*]] = llvm.mlir.cast %[[ARG1f]]
+
+  // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
+  // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
+  // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[OFFINC:.*]] = llvm.mul %[[ARG0a]], %[[STRIDE0]] : i64
+  // CHECK: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : i64
+  // CHECK: %[[OFFINC1:.*]] = llvm.mul %[[ARG1a]], %[[STRIDE1]] : i64
+  // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : i64
+  // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG1c]], %[[STRIDE1]] : i64
+  // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1b]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0c]], %[[STRIDE0]] : i64
+  // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0b]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
+  // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
+  // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[OFFINC:.*]] = llvm.mul %[[ARG0a]], %[[STRIDE0]] : i32
+  // CHECK32: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : i32
+  // CHECK32: %[[OFFINC1:.*]] = llvm.mul %[[ARG1a]], %[[STRIDE1]] : i32
+  // CHECK32: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : i32
+  // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG1c]], %[[STRIDE1]] : i32
+  // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1b]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0c]], %[[STRIDE0]] : i32
+  // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0b]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+
+  %1 = memref.subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] :
+    memref<64x4xf32, offset: 0, strides: [4, 1]>
+  to memref<?x?xf32, offset: ?, strides: [?, ?]>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @subview_non_zero_addrspace(
+// CHECK:         %[[MEM:.*]]: memref<{{.*}}>,
+// CHECK:         %[[ARG0f:[a-zA-Z0-9]*]]: index,
+// CHECK:         %[[ARG1f:[a-zA-Z0-9]*]]: index,
+// CHECK:         %[[ARG2f:.*]]: index)
+// CHECK32-LABEL: func @subview_non_zero_addrspace(
+// CHECK32:         %[[MEM:.*]]: memref<{{.*}}>,
+// CHECK32:         %[[ARG0f:[a-zA-Z0-9]*]]: index,
+// CHECK32:         %[[ARG1f:[a-zA-Z0-9]*]]: index,
+// CHECK32:         %[[ARG2f:.*]]: index)
+func @subview_non_zero_addrspace(%0 : memref<64x4xf32, offset: 0, strides: [4, 1], 3>, %arg0 : index, %arg1 : index, %arg2 : index) {
+  // CHECK: %[[MEMREF:.*]] = llvm.mlir.cast %[[MEM]]
+  // CHECK32: %[[MEMREF:.*]] = llvm.mlir.cast %[[MEM]]
+
+  // CHECK: %[[ARG0a:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK: %[[ARG1a:.*]] = llvm.mlir.cast %[[ARG1f]]
+  // CHECK: %[[ARG0b:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK: %[[ARG1b:.*]] = llvm.mlir.cast %[[ARG1f]]
+  // CHECK: %[[ARG0c:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK: %[[ARG1c:.*]] = llvm.mlir.cast %[[ARG1f]]
+  // CHECK32: %[[ARG0a:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK32: %[[ARG1a:.*]] = llvm.mlir.cast %[[ARG1f]]
+  // CHECK32: %[[ARG0b:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK32: %[[ARG1b:.*]] = llvm.mlir.cast %[[ARG1f]]
+  // CHECK32: %[[ARG0c:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK32: %[[ARG1c:.*]] = llvm.mlir.cast %[[ARG1f]]
+
+  // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32, 3> to !llvm.ptr<f32, 3>
+  // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32, 3> to !llvm.ptr<f32, 3>
+  // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[OFFINC:.*]] = llvm.mul %[[ARG0a]], %[[STRIDE0]] : i64
+  // CHECK: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : i64
+  // CHECK: %[[OFFINC1:.*]] = llvm.mul %[[ARG1a]], %[[STRIDE1]] : i64
+  // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : i64
+  // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG1c]], %[[STRIDE1]] : i64
+  // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1b]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0c]], %[[STRIDE0]] : i64
+  // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0b]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32, 3> to !llvm.ptr<f32, 3>
+  // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32, 3> to !llvm.ptr<f32, 3>
+  // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[OFFINC:.*]] = llvm.mul %[[ARG0a]], %[[STRIDE0]] : i32
+  // CHECK32: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : i32
+  // CHECK32: %[[OFFINC1:.*]] = llvm.mul %[[ARG1a]], %[[STRIDE1]] : i32
+  // CHECK32: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : i32
+  // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG1c]], %[[STRIDE1]] : i32
+  // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1b]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0c]], %[[STRIDE0]] : i32
+  // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0b]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
+
+  %1 = memref.subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] :
+    memref<64x4xf32, offset: 0, strides: [4, 1], 3>
+    to memref<?x?xf32, offset: ?, strides: [?, ?], 3>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @subview_const_size(
+// CHECK-SAME:         %[[MEM:.*]]: memref<{{.*}}>,
+// CHECK-SAME:         %[[ARG0f:[a-zA-Z0-9]*]]: index
+// CHECK-SAME:         %[[ARG1f:[a-zA-Z0-9]*]]: index
+// CHECK-SAME:         %[[ARG2f:[a-zA-Z0-9]*]]: index
+// CHECK32-LABEL: func @subview_const_size(
+// CHECK32-SAME:         %[[MEM:.*]]: memref<{{.*}}>,
+// CHECK32-SAME:         %[[ARG0f:[a-zA-Z0-9]*]]: index
+// CHECK32-SAME:         %[[ARG1f:[a-zA-Z0-9]*]]: index
+// CHECK32-SAME:         %[[ARG2f:[a-zA-Z0-9]*]]: index
+func @subview_const_size(%0 : memref<64x4xf32, offset: 0, strides: [4, 1]>, %arg0 : index, %arg1 : index, %arg2 : index) {
+  // CHECK: %[[MEMREF:.*]] = llvm.mlir.cast %[[MEM]]
+  // CHECK32: %[[MEMREF:.*]] = llvm.mlir.cast %[[MEM]]
+
+  // CHECK: %[[ARG0a:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK: %[[ARG1a:.*]] = llvm.mlir.cast %[[ARG1f]]
+  // CHECK: %[[ARG0b:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK: %[[ARG1b:.*]] = llvm.mlir.cast %[[ARG1f]]
+  // CHECK32: %[[ARG0a:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK32: %[[ARG1a:.*]] = llvm.mlir.cast %[[ARG1f]]
+  // CHECK32: %[[ARG0b:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK32: %[[ARG1b:.*]] = llvm.mlir.cast %[[ARG1f]]
+
+  // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
+  // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
+  // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[OFFINC:.*]] = llvm.mul %[[ARG0a]], %[[STRIDE0]] : i64
+  // CHECK: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : i64
+  // CHECK: %[[OFFINC1:.*]] = llvm.mul %[[ARG1a]], %[[STRIDE1]] : i64
+  // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : i64
+  // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[CST2:.*]] = llvm.mlir.constant(2 : i64)
+  // CHECK: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG1b]], %[[STRIDE1]] : i64
+  // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[CST2]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[CST4:.*]] = llvm.mlir.constant(4 : i64)
+  // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0b]], %[[STRIDE0]] : i64
+  // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[CST4]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
+  // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
+  // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[OFFINC:.*]] = llvm.mul %[[ARG0a]], %[[STRIDE0]] : i32
+  // CHECK32: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : i32
+  // CHECK32: %[[OFFINC1:.*]] = llvm.mul %[[ARG1a]], %[[STRIDE1]] : i32
+  // CHECK32: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : i32
+  // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[CST2:.*]] = llvm.mlir.constant(2 : i64)
+  // CHECK32: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG1b]], %[[STRIDE1]] : i32
+  // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[CST2]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[CST4:.*]] = llvm.mlir.constant(4 : i64)
+  // CHECK32: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0b]], %[[STRIDE0]] : i32
+  // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[CST4]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  %1 = memref.subview %0[%arg0, %arg1][4, 2][%arg0, %arg1] :
+    memref<64x4xf32, offset: 0, strides: [4, 1]>
+    to memref<4x2xf32, offset: ?, strides: [?, ?]>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @subview_const_stride(
+// CHECK-SAME:         %[[MEM:.*]]: memref<{{.*}}>,
+// CHECK-SAME:         %[[ARG0f:[a-zA-Z0-9]*]]: index
+// CHECK-SAME:         %[[ARG1f:[a-zA-Z0-9]*]]: index
+// CHECK-SAME:         %[[ARG2f:[a-zA-Z0-9]*]]: index
+// CHECK32-LABEL: func @subview_const_stride(
+// CHECK32-SAME:         %[[MEM:.*]]: memref<{{.*}}>,
+// CHECK32-SAME:         %[[ARG0f:[a-zA-Z0-9]*]]: index
+// CHECK32-SAME:         %[[ARG1f:[a-zA-Z0-9]*]]: index
+// CHECK32-SAME:         %[[ARG2f:[a-zA-Z0-9]*]]: index
+func @subview_const_stride(%0 : memref<64x4xf32, offset: 0, strides: [4, 1]>, %arg0 : index, %arg1 : index, %arg2 : index) {
+  // CHECK: %[[MEMREF:.*]] = llvm.mlir.cast %[[MEM]]
+  // CHECK32: %[[MEMREF:.*]] = llvm.mlir.cast %[[MEM]]
+
+  // CHECK: %[[ARG0a:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK: %[[ARG1a:.*]] = llvm.mlir.cast %[[ARG1f]]
+  // CHECK: %[[ARG0b:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK: %[[ARG1b:.*]] = llvm.mlir.cast %[[ARG1f]]
+  // CHECK32: %[[ARG0a:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK32: %[[ARG1a:.*]] = llvm.mlir.cast %[[ARG1f]]
+  // CHECK32: %[[ARG0b:.*]] = llvm.mlir.cast %[[ARG0f]]
+  // CHECK32: %[[ARG1b:.*]] = llvm.mlir.cast %[[ARG1f]]
+
+  // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
+  // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
+  // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[OFFINC:.*]] = llvm.mul %[[ARG0a]], %[[STRIDE0]] : i64
+  // CHECK: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : i64
+  // CHECK: %[[OFFINC1:.*]] = llvm.mul %[[ARG1a]], %[[STRIDE1]] : i64
+  // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : i64
+  // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[CST2:.*]] = llvm.mlir.constant(2 : i64)
+  // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1b]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[CST2]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[CST4:.*]] = llvm.mlir.constant(4 : i64)
+  // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0b]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.insertvalue %[[CST4]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
+  // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
+  // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[OFFINC:.*]] = llvm.mul %[[ARG0a]], %[[STRIDE0]] : i32
+  // CHECK32: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : i32
+  // CHECK32: %[[OFFINC1:.*]] = llvm.mul %[[ARG1a]], %[[STRIDE1]] : i32
+  // CHECK32: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : i32
+  // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[CST2:.*]] = llvm.mlir.constant(2 : i64)
+  // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1b]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[CST2]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[CST4:.*]] = llvm.mlir.constant(4 : i64)
+  // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0b]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: llvm.insertvalue %[[CST4]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  %1 = memref.subview %0[%arg0, %arg1][%arg0, %arg1][1, 2] :
+    memref<64x4xf32, offset: 0, strides: [4, 1]>
+    to memref<?x?xf32, offset: ?, strides: [4, 2]>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @subview_const_stride_and_offset(
+// CHECK32-LABEL: func @subview_const_stride_and_offset(
+func @subview_const_stride_and_offset(%0 : memref<64x4xf32, offset: 0, strides: [4, 1]>) {
+  // The last "insertvalue" that populates the memref descriptor from the function arguments.
+  // CHECK: %[[MEMREF:.*]] = llvm.mlir.cast
+  // CHECK32: %[[MEMREF:.*]] = llvm.mlir.cast
+
+  // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
+  // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
+  // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[CST8:.*]] = llvm.mlir.constant(8 : index)
+  // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[CST8]], %[[DESC1]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[CST3:.*]] = llvm.mlir.constant(3 : i64)
+  // CHECK32: %[[CST1:.*]] = llvm.mlir.constant(1 : i64)
+  // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[CST3]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[CST1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[CST62:.*]] = llvm.mlir.constant(62 : i64)
+  // CHECK32: %[[CST4:.*]] = llvm.mlir.constant(4 : i64)
+  // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[CST62]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: llvm.insertvalue %[[CST4]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  %1 = memref.subview %0[0, 8][62, 3][1, 1] :
+    memref<64x4xf32, offset: 0, strides: [4, 1]>
+    to memref<62x3xf32, offset: 8, strides: [4, 1]>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @subview_mixed_static_dynamic(
+// CHECK:         %[[MEM:.*]]: memref<{{.*}}>,
+// CHECK:         %[[ARG0f:[a-zA-Z0-9]*]]: index,
+// CHECK:         %[[ARG1f:[a-zA-Z0-9]*]]: index,
+// CHECK:         %[[ARG2f:.*]]: index)
+// CHECK32-LABEL: func @subview_mixed_static_dynamic(
+// CHECK32:         %[[MEM:.*]]: memref<{{.*}}>,
+// CHECK32:         %[[ARG0f:[a-zA-Z0-9]*]]: index,
+// CHECK32:         %[[ARG1f:[a-zA-Z0-9]*]]: index,
+// CHECK32:         %[[ARG2f:.*]]: index)
+func @subview_mixed_static_dynamic(%0 : memref<64x4xf32, offset: 0, strides: [4, 1]>, %arg0 : index, %arg1 : index, %arg2 : index) {
+  // CHECK32: %[[MEMREF:.*]] = llvm.mlir.cast %[[MEM]]
+  // CHECK32: %[[ARG1:.*]] = llvm.mlir.cast %[[ARG1f]]
+  // CHECK32: %[[ARG2:.*]] = llvm.mlir.cast %[[ARG2f]]
+  // CHECK32: %[[ARG0:.*]] = llvm.mlir.cast %[[ARG0f]]
+
+  // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
+  // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
+  // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[OFFM1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE0]] : i32
+  // CHECK32: %[[OFFA1:.*]] = llvm.add %[[OFF]], %[[OFFM1]] : i32
+  // CHECK32: %[[CST8:.*]] = llvm.mlir.constant(8 : i64) : i32
+  // CHECK32: %[[OFFM2:.*]] = llvm.mul %[[CST8]], %[[STRIDE1]] : i32
+  // CHECK32: %[[OFFA2:.*]] = llvm.add %[[OFFA1]], %[[OFFM2]] : i32
+  // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFFA2]], %[[DESC1]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+
+  // CHECK32: %[[CST1:.*]] = llvm.mlir.constant(1 : i64) : i32
+  // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[ARG2]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[CST1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: %[[CST62:.*]] = llvm.mlir.constant(62 : i64) : i32
+  // CHECK32: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : i32
+  // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[CST62]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  // CHECK32: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
+  %1 = memref.subview %0[%arg1, 8][62, %arg2][%arg0, 1] :
+    memref<64x4xf32, offset: 0, strides: [4, 1]>
+    to memref<62x?xf32, offset: ?, strides: [?, 1]>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @subview_leading_operands(
+func @subview_leading_operands(%0 : memref<5x3xf32>, %1: memref<5x?xf32>) {
+  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // Alloc ptr
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // Aligned ptr
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // Offset
+  // CHECK: %[[C6:.*]] = llvm.mlir.constant(6 : index) : i64
+  // CHECK: llvm.insertvalue %[[C6:.*]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // Sizes and strides @rank 1: both static.
+  // CHECK: %[[C3:.*]] = llvm.mlir.constant(3 : i64) : i64
+  // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64
+  // CHECK: llvm.insertvalue %[[C3]], %{{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.insertvalue %[[C1]], %{{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // Sizes and strides @rank 0: both static extracted from type.
+  // CHECK: %[[C3_2:.*]] = llvm.mlir.constant(3 : i64) : i64
+  // CHECK: %[[C3_3:.*]] = llvm.mlir.constant(3 : i64) : i64
+  // CHECK: llvm.insertvalue %[[C3_2]], %{{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.insertvalue %[[C3_3]], %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  %2 = memref.subview %0[2][3][1]: memref<5x3xf32> to memref<3x3xf32, offset: 6, strides: [3, 1]>
+
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @subview_leading_operands_dynamic(
+func @subview_leading_operands_dynamic(%0 : memref<5x?xf32>) {
+  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // Alloc ptr
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // Aligned ptr
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // Extract strides
+  // CHECK: %[[ST0:.*]] = llvm.extractvalue %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[ST1:.*]] = llvm.extractvalue %{{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // Compute and insert offset from 2 + dynamic value.
+  // CHECK: %[[OFF:.*]] = llvm.extractvalue %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : i64) : i64
+  // CHECK: %[[MUL:.*]] = llvm.mul %[[C2]], %[[ST0]] : i64
+  // CHECK: %[[NEW_OFF:.*]] = llvm.add %[[OFF]], %[[MUL]]  : i64
+  // CHECK: llvm.insertvalue %[[NEW_OFF]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // Sizes and strides @rank 1: static stride 1, dynamic size unchanged from source memref.
+  // CHECK: %[[SZ1:.*]] = llvm.extractvalue %{{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.insertvalue %[[C1]], %{{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // Sizes and strides @rank 0: both static.
+  // CHECK: %[[C3:.*]] = llvm.mlir.constant(3 : i64) : i64
+  // CHECK: %[[C1_2:.*]] = llvm.mlir.constant(1 : i64) : i64
+  // CHECK: %[[MUL:.*]] = llvm.mul %[[C1_2]], %[[ST0]]  : i64
+  // CHECK: llvm.insertvalue %[[C3]], %{{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.insertvalue %[[MUL]], %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  %1 = memref.subview %0[2][3][1]: memref<5x?xf32> to memref<3x?xf32, offset: ?, strides: [?, 1]>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @subview_rank_reducing_leading_operands(
+func @subview_rank_reducing_leading_operands(%0 : memref<5x3xf32>) {
+  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
+  // Alloc ptr
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
+  // Aligned ptr
+  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
+  // Extract strides
+  // CHECK: %[[ST0:.*]] = llvm.extractvalue %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[ST1:.*]] = llvm.extractvalue %{{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // Offset
+  // CHECK: %[[C3:.*]] = llvm.mlir.constant(3 : index) : i64
+  // CHECK: llvm.insertvalue %[[C3:.*]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
+  // Sizes and strides @rank 0: both static.
+  // CHECK: %[[C3:.*]] = llvm.mlir.constant(3 : i64) : i64
+  // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64
+  // CHECK: llvm.insertvalue %[[C3]], %{{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
+  // CHECK: llvm.insertvalue %[[C1]], %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
+  %1 = memref.subview %0[1][1][1]: memref<5x3xf32> to memref<3xf32, offset: 3, strides: [1]>
+
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @assume_alignment
+func @assume_alignment(%0 : memref<4x4xf16>) {
+  // CHECK: %[[PTR:.*]] = llvm.extractvalue %[[MEMREF:.*]][1] : !llvm.struct<(ptr<f16>, ptr<f16>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK-NEXT: %[[ZERO:.*]] = llvm.mlir.constant(0 : index) : i64
+  // CHECK-NEXT: %[[MASK:.*]] = llvm.mlir.constant(15 : index) : i64
+  // CHECK-NEXT: %[[INT:.*]] = llvm.ptrtoint %[[PTR]] : !llvm.ptr<f16> to i64
+  // CHECK-NEXT: %[[MASKED_PTR:.*]] = llvm.and %[[INT]], %[[MASK:.*]] : i64
+  // CHECK-NEXT: %[[CONDITION:.*]] = llvm.icmp "eq" %[[MASKED_PTR]], %[[ZERO]] : i64
+  // CHECK-NEXT: "llvm.intr.assume"(%[[CONDITION]]) : (i1) -> ()
+  memref.assume_alignment %0, 16 : memref<4x4xf16>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @dim_of_unranked
+// CHECK32-LABEL: func @dim_of_unranked
+func @dim_of_unranked(%unranked: memref<*xi32>) -> index {
+  %c0 = constant 0 : index
+  %dim = memref.dim %unranked, %c0 : memref<*xi32>
+  return %dim : index
+}
+// CHECK: %[[UNRANKED_DESC:.*]] = llvm.mlir.cast
+
+// CHECK: %[[RANKED_DESC:.*]] = llvm.extractvalue %[[UNRANKED_DESC]][1]
+// CHECK-SAME:   : !llvm.struct<(i64, ptr<i8>)>
+
+// CHECK: %[[ZERO_D_DESC:.*]] = llvm.bitcast %[[RANKED_DESC]]
+// CHECK-SAME:   : !llvm.ptr<i8> to !llvm.ptr<struct<(ptr<i32>, ptr<i32>, i64)>>
+
+// CHECK: %[[C2_i32:.*]] = llvm.mlir.constant(2 : i32) : i32
+// CHECK: %[[C0_:.*]] = llvm.mlir.constant(0 : index) : i64
+
+// CHECK: %[[OFFSET_PTR:.*]] = llvm.getelementptr %[[ZERO_D_DESC]]{{\[}}
+// CHECK-SAME:   %[[C0_]], %[[C2_i32]]] : (!llvm.ptr<struct<(ptr<i32>, ptr<i32>,
+// CHECK-SAME:   i64)>>, i64, i32) -> !llvm.ptr<i64>
+
+// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) : i64
+// CHECK: %[[INDEX_INC:.*]] = llvm.add %[[C1]], %{{.*}} : i64
+
+// CHECK: %[[SIZE_PTR:.*]] = llvm.getelementptr %[[OFFSET_PTR]]{{\[}}
+// CHECK-SAME:   %[[INDEX_INC]]] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
+
+// CHECK: %[[SIZE:.*]] = llvm.load %[[SIZE_PTR]] : !llvm.ptr<i64>
+
+// CHECK32: %[[SIZE:.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
+
+// -----
+
+// CHECK-LABEL: func @address_space(
+func @address_space(%arg0 : memref<32xf32, affine_map<(d0) -> (d0)>, 7>) {
+  %0 = memref.alloc() : memref<32xf32, affine_map<(d0) -> (d0)>, 5>
+  %1 = constant 7 : index
+  // CHECK: llvm.load %{{.*}} : !llvm.ptr<f32, 5>
+  %2 = memref.load %0[%1] : memref<32xf32, affine_map<(d0) -> (d0)>, 5>
+  std.return
+}
+
+// -----
+
+// CHECK-LABEL: func @transpose
+//       CHECK:   llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
+//       CHECK:   llvm.insertvalue {{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
+//       CHECK:    llvm.insertvalue {{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
+//       CHECK:    llvm.insertvalue {{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
+//       CHECK:   llvm.extractvalue {{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
+//       CHECK:    llvm.insertvalue {{.*}}[3, 2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
+//       CHECK:   llvm.extractvalue {{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
+//       CHECK:    llvm.insertvalue {{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
+//       CHECK:   llvm.extractvalue {{.*}}[3, 2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
+//       CHECK:    llvm.insertvalue {{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
+func @transpose(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
+  %0 = memref.transpose %arg0 (i, j, k) -> (k, i, j) : memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2 * s1 + s0 + d0 * s2 + d1)>>
+  return
+}
+
+// -----
+
+// CHECK: llvm.mlir.global external @gv0() : !llvm.array<2 x f32>
+memref.global @gv0 : memref<2xf32> = uninitialized
+
+// CHECK: llvm.mlir.global private @gv1() : !llvm.array<2 x f32>
+memref.global "private" @gv1 : memref<2xf32>
+
+// CHECK: llvm.mlir.global external @gv2(dense<{{\[\[}}0.000000e+00, 1.000000e+00, 2.000000e+00], [3.000000e+00, 4.000000e+00, 5.000000e+00]]> : tensor<2x3xf32>) : !llvm.array<2 x array<3 x f32>>
+memref.global @gv2 : memref<2x3xf32> = dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]>
+
+// Test 1D memref.
+// CHECK-LABEL: func @get_gv0_memref
+func @get_gv0_memref() {
+  %0 = memref.get_global @gv0 : memref<2xf32>
+  // CHECK: %[[DIM:.*]] = llvm.mlir.constant(2 : index) : i64
+  // CHECK: %[[STRIDE:.*]] = llvm.mlir.constant(1 : index) : i64
+  // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @gv0 : !llvm.ptr<array<2 x f32>>
+  // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : index) : i64
+  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ADDR]][%[[ZERO]], %[[ZERO]]] : (!llvm.ptr<array<2 x f32>>, i64, i64) -> !llvm.ptr<f32>
+  // CHECK: %[[DEADBEEF:.*]] = llvm.mlir.constant(3735928559 : index) : i64
+  // CHECK: %[[DEADBEEFPTR:.*]] = llvm.inttoptr %[[DEADBEEF]] : i64 to !llvm.ptr<f32>
+  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
+  // CHECK: llvm.insertvalue %[[DEADBEEFPTR]], {{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
+  // CHECK: llvm.insertvalue %[[GEP]], {{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
+  // CHECK: %[[OFFSET:.*]] = llvm.mlir.constant(0 : index) : i64
+  // CHECK: llvm.insertvalue %[[OFFSET]], {{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
+  // CHECK: llvm.insertvalue %[[DIM]], {{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
+  // CHECK: llvm.insertvalue %[[STRIDE]], {{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
+  return
+}
+
+// Test 2D memref.
+// CHECK-LABEL: func @get_gv2_memref
+func @get_gv2_memref() {
+  // CHECK: %[[DIM0:.*]] = llvm.mlir.constant(2 : index) : i64
+  // CHECK: %[[DIM1:.*]] = llvm.mlir.constant(3 : index) : i64
+  // CHECK: %[[STRIDE1:.*]] = llvm.mlir.constant(1 : index) : i64
+  // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @gv2 : !llvm.ptr<array<2 x array<3 x f32>>>
+  // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : index) : i64
+  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ADDR]][%[[ZERO]], %[[ZERO]], %[[ZERO]]] : (!llvm.ptr<array<2 x array<3 x f32>>>, i64, i64, i64) -> !llvm.ptr<f32>
+  // CHECK: %[[DEADBEEF:.*]] = llvm.mlir.constant(3735928559 : index) : i64
+  // CHECK: %[[DEADBEEFPTR:.*]] = llvm.inttoptr %[[DEADBEEF]] : i64 to !llvm.ptr<f32>
+  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.insertvalue %[[DEADBEEFPTR]], {{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.insertvalue %[[GEP]], {{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: %[[OFFSET:.*]] = llvm.mlir.constant(0 : index) : i64
+  // CHECK: llvm.insertvalue %[[OFFSET]], {{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.insertvalue %[[DIM0]], {{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.insertvalue %[[DIM1]], {{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.insertvalue %[[DIM1]], {{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+  // CHECK: llvm.insertvalue %[[STRIDE1]], {{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+
+  %0 = memref.get_global @gv2 : memref<2x3xf32>
+  return
+}
+
+// Test scalar memref.
+// CHECK: llvm.mlir.global external @gv3(1.000000e+00 : f32) : f32
+memref.global @gv3 : memref<f32> = dense<1.0>
+
+// CHECK-LABEL: func @get_gv3_memref
+func @get_gv3_memref() {
+  // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @gv3 : !llvm.ptr<f32>
+  // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : index) : i64
+  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ADDR]][%[[ZERO]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
+  // CHECK: %[[DEADBEEF:.*]] = llvm.mlir.constant(3735928559 : index) : i64
+  // CHECK: %[[DEADBEEFPTR:.*]] = llvm.inttoptr %[[DEADBEEF]] : i64 to !llvm.ptr<f32>
+  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
+  // CHECK: llvm.insertvalue %[[DEADBEEFPTR]], {{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
+  // CHECK: llvm.insertvalue %[[GEP]], {{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
+  // CHECK: %[[OFFSET:.*]] = llvm.mlir.constant(0 : index) : i64
+  // CHECK: llvm.insertvalue %[[OFFSET]], {{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
+  %0 = memref.get_global @gv3 : memref<f32>
+  return
+}
+

diff  --git a/mlir/test/Conversion/StandardToLLVM/calling-convention.mlir b/mlir/test/Conversion/StandardToLLVM/calling-convention.mlir
index e0fc24ee13339..c0115b01af4aa 100644
--- a/mlir/test/Conversion/StandardToLLVM/calling-convention.mlir
+++ b/mlir/test/Conversion/StandardToLLVM/calling-convention.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt -convert-std-to-llvm='emit-c-wrappers=1' %s | FileCheck %s
-// RUN: mlir-opt -convert-std-to-llvm %s | FileCheck %s --check-prefix=EMIT_C_ATTRIBUTE
+// RUN: mlir-opt -convert-memref-to-llvm -convert-std-to-llvm='emit-c-wrappers=1' %s | FileCheck %s
+// RUN: mlir-opt -convert-memref-to-llvm -convert-std-to-llvm %s | FileCheck %s --check-prefix=EMIT_C_ATTRIBUTE
 
 // This tests the default memref calling convention and the emission of C
 // wrappers. We don't need to separate runs because the wrapper-emission

diff  --git a/mlir/test/Conversion/StandardToLLVM/convert-argattrs.mlir b/mlir/test/Conversion/StandardToLLVM/convert-argattrs.mlir
index 49a5a2f2f871d..7c9a0c19790eb 100644
--- a/mlir/test/Conversion/StandardToLLVM/convert-argattrs.mlir
+++ b/mlir/test/Conversion/StandardToLLVM/convert-argattrs.mlir
@@ -4,8 +4,6 @@
 // When expanding the memref to multiple arguments, argument attributes are replicated.
 // CHECK-COUNT-7: {dialect.a = true, dialect.b = 4 : i64}
 func @check_attributes(%static: memref<10x20xf32> {dialect.a = true, dialect.b = 4 : i64 }) {
-  %c0 = constant 0 : index
-  %0 = memref.load %static[%c0, %c0]: memref<10x20xf32>
   return
 }
 

diff  --git a/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir b/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir
deleted file mode 100644
index aeb222630afba..0000000000000
--- a/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir
+++ /dev/null
@@ -1,450 +0,0 @@
-// RUN: mlir-opt -convert-std-to-llvm -split-input-file %s | FileCheck %s
-// RUN: mlir-opt -convert-std-to-llvm='use-bare-ptr-memref-call-conv=1' -split-input-file %s | FileCheck %s --check-prefix=BAREPTR
-
-// BAREPTR-LABEL: func @check_noalias
-// BAREPTR-SAME: %{{.*}}: !llvm.ptr<f32> {llvm.noalias}, %{{.*}}: !llvm.ptr<f32> {llvm.noalias}
-func @check_noalias(%static : memref<2xf32> {llvm.noalias}, %other : memref<2xf32> {llvm.noalias}) {
-    return
-}
-
-// -----
-
-// CHECK-LABEL: func @check_static_return
-// CHECK-COUNT-2: !llvm.ptr<f32>
-// CHECK-COUNT-5: i64
-// CHECK-SAME: -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-LABEL: func @check_static_return
-// BAREPTR-SAME: (%[[arg:.*]]: !llvm.ptr<f32>) -> !llvm.ptr<f32> {
-func @check_static_return(%static : memref<32x18xf32>) -> memref<32x18xf32> {
-// CHECK:  llvm.return %{{.*}} : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-
-// BAREPTR: %[[udf:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: %[[base0:.*]] = llvm.insertvalue %[[arg]], %[[udf]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: %[[aligned:.*]] = llvm.insertvalue %[[arg]], %[[base0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: %[[val0:.*]] = llvm.mlir.constant(0 : index) : i64
-// BAREPTR-NEXT: %[[ins0:.*]] = llvm.insertvalue %[[val0]], %[[aligned]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: %[[val1:.*]] = llvm.mlir.constant(32 : index) : i64
-// BAREPTR-NEXT: %[[ins1:.*]] = llvm.insertvalue %[[val1]], %[[ins0]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: %[[val2:.*]] = llvm.mlir.constant(18 : index) : i64
-// BAREPTR-NEXT: %[[ins2:.*]] = llvm.insertvalue %[[val2]], %[[ins1]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: %[[val3:.*]] = llvm.mlir.constant(18 : index) : i64
-// BAREPTR-NEXT: %[[ins3:.*]] = llvm.insertvalue %[[val3]], %[[ins2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: %[[val4:.*]] = llvm.mlir.constant(1 : index) : i64
-// BAREPTR-NEXT: %[[ins4:.*]] = llvm.insertvalue %[[val4]], %[[ins3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: %[[base1:.*]] = llvm.extractvalue %[[ins4]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: llvm.return %[[base1]] : !llvm.ptr<f32>
-  return %static : memref<32x18xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @check_static_return_with_offset
-// CHECK-COUNT-2: !llvm.ptr<f32>
-// CHECK-COUNT-5: i64
-// CHECK-SAME: -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-LABEL: func @check_static_return_with_offset
-// BAREPTR-SAME: (%[[arg:.*]]: !llvm.ptr<f32>) -> !llvm.ptr<f32> {
-func @check_static_return_with_offset(%static : memref<32x18xf32, offset:7, strides:[22,1]>) -> memref<32x18xf32, offset:7, strides:[22,1]> {
-// CHECK:  llvm.return %{{.*}} : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-
-// BAREPTR: %[[udf:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: %[[base0:.*]] = llvm.insertvalue %[[arg]], %[[udf]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: %[[aligned:.*]] = llvm.insertvalue %[[arg]], %[[base0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: %[[val0:.*]] = llvm.mlir.constant(7 : index) : i64
-// BAREPTR-NEXT: %[[ins0:.*]] = llvm.insertvalue %[[val0]], %[[aligned]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: %[[val1:.*]] = llvm.mlir.constant(32 : index) : i64
-// BAREPTR-NEXT: %[[ins1:.*]] = llvm.insertvalue %[[val1]], %[[ins0]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: %[[val2:.*]] = llvm.mlir.constant(22 : index) : i64
-// BAREPTR-NEXT: %[[ins2:.*]] = llvm.insertvalue %[[val2]], %[[ins1]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: %[[val3:.*]] = llvm.mlir.constant(18 : index) : i64
-// BAREPTR-NEXT: %[[ins3:.*]] = llvm.insertvalue %[[val3]], %[[ins2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: %[[val4:.*]] = llvm.mlir.constant(1 : index) : i64
-// BAREPTR-NEXT: %[[ins4:.*]] = llvm.insertvalue %[[val4]], %[[ins3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: %[[base1:.*]] = llvm.extractvalue %[[ins4]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: llvm.return %[[base1]] : !llvm.ptr<f32>
-  return %static : memref<32x18xf32, offset:7, strides:[22,1]>
-}
-
-// -----
-
-// CHECK-LABEL: func @zero_d_alloc() -> !llvm.struct<(ptr<f32>, ptr<f32>, i64)> {
-// BAREPTR-LABEL: func @zero_d_alloc() -> !llvm.ptr<f32> {
-func @zero_d_alloc() -> memref<f32> {
-// CHECK-NEXT:  %[[one:.*]] = llvm.mlir.constant(1 : index) : i64
-// CHECK-NEXT:  %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
-// CHECK-NEXT:  %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
-// CHECK-NEXT:  %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
-// CHECK-NEXT:  llvm.call @malloc(%[[size_bytes]]) : (i64) -> !llvm.ptr<i8>
-// CHECK-NEXT:  %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<f32>
-// CHECK-NEXT:  llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-// CHECK-NEXT:  llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-// CHECK-NEXT:  llvm.insertvalue %[[ptr]], %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-// CHECK-NEXT:  %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64
-// CHECK-NEXT:  llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-
-// BAREPTR-NEXT:  %[[one:.*]] = llvm.mlir.constant(1 : index) : i64
-// BAREPTR-NEXT:  %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
-// BAREPTR-NEXT:  %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
-// BAREPTR-NEXT:  %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
-// BAREPTR-NEXT:  llvm.call @malloc(%[[size_bytes]]) : (i64) -> !llvm.ptr<i8>
-// BAREPTR-NEXT:  %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<f32>
-// BAREPTR-NEXT:  llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-// BAREPTR-NEXT:  llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-// BAREPTR-NEXT:  llvm.insertvalue %[[ptr]], %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-// BAREPTR-NEXT:  %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64
-// BAREPTR-NEXT:  llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-  %0 = memref.alloc() : memref<f32>
-  return %0 : memref<f32>
-}
-
-// -----
-
-// CHECK-LABEL: func @zero_d_dealloc
-// BAREPTR-LABEL: func @zero_d_dealloc(%{{.*}}: !llvm.ptr<f32>) {
-func @zero_d_dealloc(%arg0: memref<f32>) {
-//      CHECK:  %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-// CHECK-NEXT:  %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr<f32> to !llvm.ptr<i8>
-// CHECK-NEXT:  llvm.call @free(%[[bc]]) : (!llvm.ptr<i8>) -> ()
-
-// BAREPTR: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-// BAREPTR-NEXT: %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr<f32> to !llvm.ptr<i8>
-// BAREPTR-NEXT: llvm.call @free(%[[bc]]) : (!llvm.ptr<i8>) -> ()
-  memref.dealloc %arg0 : memref<f32>
-  return
-}
-
-// -----
-
-// CHECK-LABEL: func @aligned_1d_alloc(
-// BAREPTR-LABEL: func @aligned_1d_alloc(
-func @aligned_1d_alloc() -> memref<42xf32> {
-// CHECK-NEXT:  %[[sz1:.*]] = llvm.mlir.constant(42 : index) : i64
-// CHECK-NEXT:  %[[st1:.*]] = llvm.mlir.constant(1 : index) : i64
-// CHECK-NEXT:  %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
-// CHECK-NEXT:  %[[gep:.*]] = llvm.getelementptr %[[null]][%[[sz1]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
-// CHECK-NEXT:  %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
-// CHECK-NEXT:  %[[alignment:.*]] = llvm.mlir.constant(8 : index) : i64
-// CHECK-NEXT:  %[[allocsize:.*]] = llvm.add %[[size_bytes]], %[[alignment]] : i64
-// CHECK-NEXT:  %[[allocated:.*]] = llvm.call @malloc(%[[allocsize]]) : (i64) -> !llvm.ptr<i8>
-// CHECK-NEXT:  %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<f32>
-// CHECK-NEXT:  %[[allocatedAsInt:.*]] = llvm.ptrtoint %[[ptr]] : !llvm.ptr<f32> to i64
-// CHECK-NEXT:  %[[one_1:.*]] = llvm.mlir.constant(1 : index) : i64
-// CHECK-NEXT:  %[[bump:.*]] = llvm.sub %[[alignment]], %[[one_1]] : i64
-// CHECK-NEXT:  %[[bumped:.*]] = llvm.add %[[allocatedAsInt]], %[[bump]] : i64
-// CHECK-NEXT:  %[[mod:.*]] = llvm.urem %[[bumped]], %[[alignment]] : i64
-// CHECK-NEXT:  %[[aligned:.*]] = llvm.sub %[[bumped]], %[[mod]] : i64
-// CHECK-NEXT:  %[[alignedBitCast:.*]] = llvm.inttoptr %[[aligned]] : i64 to !llvm.ptr<f32>
-// CHECK-NEXT:  llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
-// CHECK-NEXT:  llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
-// CHECK-NEXT:  llvm.insertvalue %[[alignedBitCast]], %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
-// CHECK-NEXT:  %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64
-// CHECK-NEXT:  llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
-
-// BAREPTR-NEXT:  %[[sz1:.*]] = llvm.mlir.constant(42 : index) : i64
-// BAREPTR-NEXT:  %[[st1:.*]] = llvm.mlir.constant(1 : index) : i64
-// BAREPTR-NEXT:  %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
-// BAREPTR-NEXT:  %[[gep:.*]] = llvm.getelementptr %[[null]][%[[sz1]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
-// BAREPTR-NEXT:  %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
-// BAREPTR-NEXT:  %[[alignment:.*]] = llvm.mlir.constant(8 : index) : i64
-// BAREPTR-NEXT:  %[[allocsize:.*]] = llvm.add %[[size_bytes]], %[[alignment]] : i64
-// BAREPTR-NEXT:  %[[allocated:.*]] = llvm.call @malloc(%[[allocsize]]) : (i64) -> !llvm.ptr<i8>
-// BAREPTR-NEXT:  %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<f32>
-// BAREPTR-NEXT:  %[[allocatedAsInt:.*]] = llvm.ptrtoint %[[ptr]] : !llvm.ptr<f32> to i64
-// BAREPTR-NEXT:  %[[one_2:.*]] = llvm.mlir.constant(1 : index) : i64
-// BAREPTR-NEXT:  %[[bump:.*]] = llvm.sub %[[alignment]], %[[one_2]] : i64
-// BAREPTR-NEXT:  %[[bumped:.*]] = llvm.add %[[allocatedAsInt]], %[[bump]] : i64
-// BAREPTR-NEXT:  %[[mod:.*]] = llvm.urem %[[bumped]], %[[alignment]] : i64
-// BAREPTR-NEXT:  %[[aligned:.*]] = llvm.sub %[[bumped]], %[[mod]] : i64
-// BAREPTR-NEXT:  %[[alignedBitCast:.*]] = llvm.inttoptr %[[aligned]] : i64 to !llvm.ptr<f32>
-// BAREPTR-NEXT:  llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
-// BAREPTR-NEXT:  llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
-// BAREPTR-NEXT:  llvm.insertvalue %[[alignedBitCast]], %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
-// BAREPTR-NEXT:  %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64
-// BAREPTR-NEXT:  llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
-  %0 = memref.alloc() {alignment = 8} : memref<42xf32>
-  return %0 : memref<42xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @static_alloc() -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)> {
-// BAREPTR-LABEL: func @static_alloc() -> !llvm.ptr<f32> {
-func @static_alloc() -> memref<32x18xf32> {
-//      CHECK:  %[[num_elems:.*]] = llvm.mlir.constant(576 : index) : i64
-// CHECK-NEXT:  %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
-// CHECK-NEXT:  %[[gep:.*]] = llvm.getelementptr %[[null]][%[[num_elems]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
-// CHECK-NEXT:  %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
-// CHECK-NEXT:  %[[allocated:.*]] = llvm.call @malloc(%[[size_bytes]]) : (i64) -> !llvm.ptr<i8>
-// CHECK-NEXT:  llvm.bitcast %[[allocated]] : !llvm.ptr<i8> to !llvm.ptr<f32>
-
-// BAREPTR:      %[[num_elems:.*]] = llvm.mlir.constant(576 : index) : i64
-// BAREPTR-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
-// BAREPTR-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[num_elems]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
-// BAREPTR-NEXT: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
-// BAREPTR-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[size_bytes]]) : (i64) -> !llvm.ptr<i8>
-// BAREPTR-NEXT: llvm.bitcast %[[allocated]] : !llvm.ptr<i8> to !llvm.ptr<f32>
- %0 = memref.alloc() : memref<32x18xf32>
- return %0 : memref<32x18xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @static_alloca() -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)> {
-func @static_alloca() -> memref<32x18xf32> {
-// CHECK-NEXT:  %[[sz1:.*]] = llvm.mlir.constant(32 : index) : i64
-// CHECK-NEXT:  %[[sz2:.*]] = llvm.mlir.constant(18 : index) : i64
-// CHECK-NEXT:  %[[st2:.*]] = llvm.mlir.constant(1 : index) : i64
-// CHECK-NEXT:  %[[num_elems:.*]] = llvm.mlir.constant(576 : index) : i64
-// CHECK-NEXT:  %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
-// CHECK-NEXT:  %[[gep:.*]] = llvm.getelementptr %[[null]][%[[num_elems]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
-// CHECK-NEXT:  %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
-// CHECK-NEXT:  %[[allocated:.*]] = llvm.alloca %[[size_bytes]] x f32 : (i64) -> !llvm.ptr<f32>
- %0 = memref.alloca() : memref<32x18xf32>
-
- // Test with explicitly specified alignment. llvm.alloca takes care of the
- // alignment. The same pointer is thus used for allocation and aligned
- // accesses.
- // CHECK: %[[alloca_aligned:.*]] = llvm.alloca %{{.*}} x f32 {alignment = 32 : i64} : (i64) -> !llvm.ptr<f32>
- // CHECK: %[[desc:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
- // CHECK: %[[desc1:.*]] = llvm.insertvalue %[[alloca_aligned]], %[[desc]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
- // CHECK: llvm.insertvalue %[[alloca_aligned]], %[[desc1]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
- memref.alloca() {alignment = 32} : memref<32x18xf32>
- return %0 : memref<32x18xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @static_dealloc
-// BAREPTR-LABEL: func @static_dealloc(%{{.*}}: !llvm.ptr<f32>) {
-func @static_dealloc(%static: memref<10x8xf32>) {
-//      CHECK:  %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK-NEXT:  %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr<f32> to !llvm.ptr<i8>
-// CHECK-NEXT:  llvm.call @free(%[[bc]]) : (!llvm.ptr<i8>) -> ()
-
-// BAREPTR:      %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr<f32> to !llvm.ptr<i8>
-// BAREPTR-NEXT: llvm.call @free(%[[bc]]) : (!llvm.ptr<i8>) -> ()
-  memref.dealloc %static : memref<10x8xf32>
-  return
-}
-
-// -----
-
-// CHECK-LABEL: func @zero_d_load
-// BAREPTR-LABEL: func @zero_d_load(%{{.*}}: !llvm.ptr<f32>) -> f32
-func @zero_d_load(%arg0: memref<f32>) -> f32 {
-//      CHECK:  %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-// CHECK-NEXT:  %{{.*}} = llvm.load %[[ptr]] : !llvm.ptr<f32>
-
-// BAREPTR:      %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-// BAREPTR-NEXT: llvm.load %[[ptr:.*]] : !llvm.ptr<f32>
-  %0 = memref.load %arg0[] : memref<f32>
-  return %0 : f32
-}
-
-// -----
-
-// CHECK-LABEL: func @static_load(
-// CHECK-COUNT-2: !llvm.ptr<f32>,
-// CHECK-COUNT-5: {{%[a-zA-Z0-9]*}}: i64
-// CHECK:         %[[I:.*]]: i64,
-// CHECK:         %[[J:.*]]: i64)
-// BAREPTR-LABEL: func @static_load
-// BAREPTR-SAME: (%[[A:.*]]: !llvm.ptr<f32>, %[[I:.*]]: i64, %[[J:.*]]: i64) {
-func @static_load(%static : memref<10x42xf32>, %i : index, %j : index) {
-//       CHECK:  %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-//  CHECK-NEXT:  %[[st0:.*]] = llvm.mlir.constant(42 : index) : i64
-//  CHECK-NEXT:  %[[offI:.*]] = llvm.mul %[[I]], %[[st0]] : i64
-//  CHECK-NEXT:  %[[off1:.*]] = llvm.add %[[offI]], %[[J]] : i64
-//  CHECK-NEXT:  %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
-// CHECK-NEXT:  llvm.load %[[addr]] : !llvm.ptr<f32>
-
-// BAREPTR:      %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: %[[st0:.*]] = llvm.mlir.constant(42 : index) : i64
-// BAREPTR-NEXT: %[[offI:.*]] = llvm.mul %[[I]], %[[st0]] : i64
-// BAREPTR-NEXT: %[[off1:.*]] = llvm.add %[[offI]], %[[J]] : i64
-// BAREPTR-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
-// BAREPTR-NEXT: llvm.load %[[addr]] : !llvm.ptr<f32>
-  %0 = memref.load %static[%i, %j] : memref<10x42xf32>
-  return
-}
-
-// -----
-
-// CHECK-LABEL: func @zero_d_store
-// BAREPTR-LABEL: func @zero_d_store
-// BAREPTR-SAME: (%[[A:.*]]: !llvm.ptr<f32>, %[[val:.*]]: f32)
-func @zero_d_store(%arg0: memref<f32>, %arg1: f32) {
-//      CHECK:  %[[ptr:.*]] = llvm.extractvalue %[[ld:.*]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-// CHECK-NEXT:  llvm.store %{{.*}}, %[[ptr]] : !llvm.ptr<f32>
-
-// BAREPTR:      %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-// BAREPTR-NEXT: llvm.store %[[val]], %[[ptr]] : !llvm.ptr<f32>
-  memref.store %arg1, %arg0[] : memref<f32>
-  return
-}
-
-// -----
-
-// CHECK-LABEL: func @static_store
-// CHECK-SAME:         %[[ARG0:[a-zA-Z0-9]*]]: !llvm.ptr<f32>
-// CHECK-SAME:         %[[ARG1:[a-zA-Z0-9]*]]: !llvm.ptr<f32>
-// CHECK-SAME:         %[[ARG2:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG3:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG4:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG5:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG6:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[I:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[J:[a-zA-Z0-9]*]]: i64
-// BAREPTR-LABEL: func @static_store
-// BAREPTR-SAME: %[[A:.*]]: !llvm.ptr<f32>
-// BAREPTR-SAME:         %[[I:[a-zA-Z0-9]*]]: i64
-// BAREPTR-SAME:         %[[J:[a-zA-Z0-9]*]]: i64
-func @static_store(%static : memref<10x42xf32>, %i : index, %j : index, %val : f32) {
-//       CHECK:  %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-//  CHECK-NEXT:  %[[st0:.*]] = llvm.mlir.constant(42 : index) : i64
-//  CHECK-NEXT:  %[[offI:.*]] = llvm.mul %[[I]], %[[st0]] : i64
-//  CHECK-NEXT:  %[[off1:.*]] = llvm.add %[[offI]], %[[J]] : i64
-//  CHECK-NEXT:  %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
-//  CHECK-NEXT:  llvm.store %{{.*}}, %[[addr]] : !llvm.ptr<f32>
-
-// BAREPTR:      %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-// BAREPTR-NEXT: %[[st0:.*]] = llvm.mlir.constant(42 : index) : i64
-// BAREPTR-NEXT: %[[offI:.*]] = llvm.mul %[[I]], %[[st0]] : i64
-// BAREPTR-NEXT: %[[off1:.*]] = llvm.add %[[offI]], %[[J]] : i64
-// BAREPTR-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
-// BAREPTR-NEXT: llvm.store %{{.*}}, %[[addr]] : !llvm.ptr<f32>
-  memref.store %val, %static[%i, %j] : memref<10x42xf32>
-  return
-}
-
-// -----
-
-// CHECK-LABEL: func @static_memref_dim
-// BAREPTR-LABEL: func @static_memref_dim(%{{.*}}: !llvm.ptr<f32>) {
-func @static_memref_dim(%static : memref<42x32x15x13x27xf32>) {
-// CHECK:        llvm.mlir.constant(42 : index) : i64
-// BAREPTR:      llvm.insertvalue %{{.*}}, %{{.*}}[4, 4] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<5 x i64>, array<5 x i64>)>
-// BAREPTR: llvm.mlir.constant(42 : index) : i64
-  %c0 = constant 0 : index
-  %0 = memref.dim %static, %c0 : memref<42x32x15x13x27xf32>
-// CHECK:  llvm.mlir.constant(32 : index) : i64
-// BAREPTR:  llvm.mlir.constant(32 : index) : i64
-  %c1 = constant 1 : index
-  %1 = memref.dim %static, %c1 : memref<42x32x15x13x27xf32>
-// CHECK:  llvm.mlir.constant(15 : index) : i64
-// BAREPTR:  llvm.mlir.constant(15 : index) : i64
-  %c2 = constant 2 : index
-  %2 = memref.dim %static, %c2 : memref<42x32x15x13x27xf32>
-// CHECK:  llvm.mlir.constant(13 : index) : i64
-// BAREPTR:  llvm.mlir.constant(13 : index) : i64
-  %c3 = constant 3 : index
-  %3 = memref.dim %static, %c3 : memref<42x32x15x13x27xf32>
-// CHECK:  llvm.mlir.constant(27 : index) : i64
-// BAREPTR:  llvm.mlir.constant(27 : index) : i64
-  %c4 = constant 4 : index
-  %4 = memref.dim %static, %c4 : memref<42x32x15x13x27xf32>
-  return
-}
-
-// -----
-
-// BAREPTR: llvm.func @foo(!llvm.ptr<i8>) -> !llvm.ptr<i8>
-func private @foo(memref<10xi8>) -> memref<20xi8>
-
-// BAREPTR-LABEL: func @check_memref_func_call
-// BAREPTR-SAME:    %[[in:.*]]: !llvm.ptr<i8>) -> !llvm.ptr<i8>
-func @check_memref_func_call(%in : memref<10xi8>) -> memref<20xi8> {
-  // BAREPTR:         %[[inDesc:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 0]
-  // BAREPTR-NEXT:    %[[barePtr:.*]] = llvm.extractvalue %[[inDesc]][1] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
-  // BAREPTR-NEXT:    %[[call:.*]] = llvm.call @foo(%[[barePtr]]) : (!llvm.ptr<i8>) -> !llvm.ptr<i8>
-  // BAREPTR-NEXT:    %[[desc0:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
-  // BAREPTR-NEXT:    %[[desc1:.*]] = llvm.insertvalue %[[call]], %[[desc0]][0] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
-  // BAREPTR-NEXT:    %[[desc2:.*]] = llvm.insertvalue %[[call]], %[[desc1]][1] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
-  // BAREPTR-NEXT:    %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64
-  // BAREPTR-NEXT:    %[[desc4:.*]] = llvm.insertvalue %[[c0]], %[[desc2]][2] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
-  // BAREPTR-NEXT:    %[[c20:.*]] = llvm.mlir.constant(20 : index) : i64
-  // BAREPTR-NEXT:    %[[desc6:.*]] = llvm.insertvalue %[[c20]], %[[desc4]][3, 0] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
-  // BAREPTR-NEXT:    %[[c1:.*]] = llvm.mlir.constant(1 : index) : i64
-  // BAREPTR-NEXT:    %[[outDesc:.*]] = llvm.insertvalue %[[c1]], %[[desc6]][4, 0] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
-  %res = call @foo(%in) : (memref<10xi8>) -> (memref<20xi8>)
-  // BAREPTR-NEXT:    %[[res:.*]] = llvm.extractvalue %[[outDesc]][1] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
-  // BAREPTR-NEXT:    llvm.return %[[res]] : !llvm.ptr<i8>
-  return %res : memref<20xi8>
-}
-
-// -----
-
-// BAREPTR: llvm.func @goo(f32) -> f32
-func private @goo(f32) -> f32
-
-// BAREPTR-LABEL: func @check_scalar_func_call
-// BAREPTR-SAME:    %[[in:.*]]: f32)
-func @check_scalar_func_call(%in : f32) {
-  // BAREPTR-NEXT:    %[[call:.*]] = llvm.call @goo(%[[in]]) : (f32) -> f32
-  %res = call @goo(%in) : (f32) -> (f32)
-  return
-}
-
-// -----
-
-// Unranked memrefs are currently not supported in the bare-ptr calling
-// convention. Check that the conversion to the LLVM-IR dialect doesn't happen
-// in the presence of unranked memrefs when using such a calling convention.
-
-// BAREPTR: func private @hoo(memref<*xi8>) -> memref<*xi8>
-func private @hoo(memref<*xi8>) -> memref<*xi8>
-
-// BAREPTR-LABEL: func @check_unranked_memref_func_call(%{{.*}}: memref<*xi8>) -> memref<*xi8>
-func @check_unranked_memref_func_call(%in: memref<*xi8>) -> memref<*xi8> {
-  // BAREPTR-NEXT: call @hoo(%{{.*}}) : (memref<*xi8>) -> memref<*xi8>
-  %res = call @hoo(%in) : (memref<*xi8>) -> memref<*xi8>
-  // BAREPTR-NEXT: return %{{.*}} : memref<*xi8>
-  return %res : memref<*xi8>
-}
-
-// -----
-
-// Check that consistent types are emitted in address arithemic in presence of
-// a data layout specification.
-module attributes { dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<index, 32>> } {
-  func @address() {
-    %c1 = constant 1 : index
-    %0 = memref.alloc(%c1) : memref<? x vector<2xf32>>
-    // CHECK: %[[CST:.*]] = llvm.mlir.constant(1 : index) : i32
-    // CHECK: llvm.mlir.null
-    // CHECK: llvm.getelementptr %{{.*}}[[CST]]
-    // CHECK: llvm.ptrtoint %{{.*}} : !llvm.ptr<{{.*}}> to i32
-    // CHECK: llvm.ptrtoint %{{.*}} : !llvm.ptr<{{.*}}> to i32
-    // CHECK: llvm.add %{{.*}} : i32
-    // CHECK: llvm.call @malloc(%{{.*}}) : (i32) -> !llvm.ptr
-    // CHECK: llvm.ptrtoint %{{.*}} : !llvm.ptr<{{.*}}> to i32
-    // CHECK: llvm.sub {{.*}} : i32
-    // CHECK: llvm.add {{.*}} : i32
-    // CHECK: llvm.urem {{.*}} : i32
-    // CHECK: llvm.sub {{.*}} : i32
-    // CHECK: llvm.inttoptr %{{.*}} : i32 to !llvm.ptr
-    return
-  }
-}
-
-// -----
-
-// Should not convert memrefs with unsupported types in any convention.
-
-// CHECK: @unsupported_memref_element_type
-// CHECK-SAME: memref<
-// CHECK-NOT: !llvm.struct
-// BAREPTR: @unsupported_memref_element_type
-// BAREPTR-SAME: memref<
-// BAREPTR-NOT: !llvm.ptr
-func private @unsupported_memref_element_type() -> memref<42 x !test.memref_element>
-
-// CHECK: @unsupported_unranked_memref_element_type
-// CHECK-SAME: memref<
-// CHECK-NOT: !llvm.struct
-// BAREPTR: @unsupported_unranked_memref_element_type
-// BAREPTR-SAME: memref<
-// BAREPTR-NOT: !llvm.ptr
-func private @unsupported_unranked_memref_element_type() -> memref<* x !test.memref_element>
-

diff  --git a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir
deleted file mode 100644
index ab74d125e93a1..0000000000000
--- a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir
+++ /dev/null
@@ -1,1455 +0,0 @@
-// RUN: mlir-opt -convert-std-to-llvm %s -split-input-file | FileCheck %s
-// RUN: mlir-opt -convert-std-to-llvm='index-bitwidth=32' %s -split-input-file | FileCheck --check-prefix=CHECK32 %s
-
-// CHECK-LABEL: func @empty() {
-// CHECK-NEXT:  llvm.return
-// CHECK-NEXT: }
-func @empty() {
-^bb0:
-  return
-}
-
-// CHECK-LABEL: llvm.func @body(i64)
-func private @body(index)
-
-// CHECK-LABEL: func @simple_loop() {
-// CHECK32-LABEL: func @simple_loop() {
-func @simple_loop() {
-^bb0:
-// CHECK-NEXT:  llvm.br ^bb1
-// CHECK32-NEXT:  llvm.br ^bb1
-  br ^bb1
-
-// CHECK-NEXT: ^bb1:	// pred: ^bb0
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(1 : index) : i64
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(42 : index) : i64
-// CHECK-NEXT:  llvm.br ^bb2({{.*}} : i64)
-// CHECK32-NEXT: ^bb1:	// pred: ^bb0
-// CHECK32-NEXT:  {{.*}} = llvm.mlir.constant(1 : index) : i32
-// CHECK32-NEXT:  {{.*}} = llvm.mlir.constant(42 : index) : i32
-// CHECK32-NEXT:  llvm.br ^bb2({{.*}} : i32)
-^bb1:	// pred: ^bb0
-  %c1 = constant 1 : index
-  %c42 = constant 42 : index
-  br ^bb2(%c1 : index)
-
-// CHECK:      ^bb2({{.*}}: i64):	// 2 preds: ^bb1, ^bb3
-// CHECK-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i64
-// CHECK-NEXT:  llvm.cond_br {{.*}}, ^bb3, ^bb4
-// CHECK32:      ^bb2({{.*}}: i32):	// 2 preds: ^bb1, ^bb3
-// CHECK32-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i32
-// CHECK32-NEXT:  llvm.cond_br {{.*}}, ^bb3, ^bb4
-^bb2(%0: index):	// 2 preds: ^bb1, ^bb3
-  %1 = cmpi slt, %0, %c42 : index
-  cond_br %1, ^bb3, ^bb4
-
-// CHECK:      ^bb3:	// pred: ^bb2
-// CHECK-NEXT:  llvm.call @body({{.*}}) : (i64) -> ()
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(1 : index) : i64
-// CHECK-NEXT:  {{.*}} = llvm.add {{.*}}, {{.*}} : i64
-// CHECK-NEXT:  llvm.br ^bb2({{.*}} : i64)
-// CHECK32:      ^bb3:	// pred: ^bb2
-// CHECK32-NEXT:  llvm.call @body({{.*}}) : (i32) -> ()
-// CHECK32-NEXT:  {{.*}} = llvm.mlir.constant(1 : index) : i32
-// CHECK32-NEXT:  {{.*}} = llvm.add {{.*}}, {{.*}} : i32
-// CHECK32-NEXT:  llvm.br ^bb2({{.*}} : i32)
-^bb3:	// pred: ^bb2
-  call @body(%0) : (index) -> ()
-  %c1_0 = constant 1 : index
-  %2 = addi %0, %c1_0 : index
-  br ^bb2(%2 : index)
-
-// CHECK:      ^bb4:	// pred: ^bb2
-// CHECK-NEXT:  llvm.return
-^bb4:	// pred: ^bb2
-  return
-}
-
-// CHECK-LABEL: func @simple_caller() {
-// CHECK-NEXT:  llvm.call @simple_loop() : () -> ()
-// CHECK-NEXT:  llvm.return
-// CHECK-NEXT: }
-func @simple_caller() {
-^bb0:
-  call @simple_loop() : () -> ()
-  return
-}
-
-// Check that function call attributes persist during conversion.
-// CHECK-LABEL: @call_with_attributes
-func @call_with_attributes() {
-  // CHECK: llvm.call @simple_loop() {baz = [1, 2, 3, 4], foo = "bar"} : () -> ()
-  call @simple_loop() {foo="bar", baz=[1,2,3,4]} : () -> ()
-  return
-}
-
-// CHECK-LABEL: func @ml_caller() {
-// CHECK-NEXT:  llvm.call @simple_loop() : () -> ()
-// CHECK-NEXT:  llvm.call @more_imperfectly_nested_loops() : () -> ()
-// CHECK-NEXT:  llvm.return
-// CHECK-NEXT: }
-func @ml_caller() {
-^bb0:
-  call @simple_loop() : () -> ()
-  call @more_imperfectly_nested_loops() : () -> ()
-  return
-}
-
-// CHECK-LABEL: llvm.func @body_args(i64) -> i64
-// CHECK32-LABEL: llvm.func @body_args(i32) -> i32
-func private @body_args(index) -> index
-// CHECK-LABEL: llvm.func @other(i64, i32) -> i32
-// CHECK32-LABEL: llvm.func @other(i32, i32) -> i32
-func private @other(index, i32) -> i32
-
-// CHECK-LABEL: func @func_args(%arg0: i32, %arg1: i32) -> i32 {
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(0 : i32) : i32
-// CHECK-NEXT:  llvm.br ^bb1
-// CHECK32-LABEL: func @func_args(%arg0: i32, %arg1: i32) -> i32 {
-// CHECK32-NEXT:  {{.*}} = llvm.mlir.constant(0 : i32) : i32
-// CHECK32-NEXT:  llvm.br ^bb1
-func @func_args(i32, i32) -> i32 {
-^bb0(%arg0: i32, %arg1: i32):
-  %c0_i32 = constant 0 : i32
-  br ^bb1
-
-// CHECK-NEXT: ^bb1:	// pred: ^bb0
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(0 : index) : i64
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(42 : index) : i64
-// CHECK-NEXT:  llvm.br ^bb2({{.*}} : i64)
-// CHECK32-NEXT: ^bb1:	// pred: ^bb0
-// CHECK32-NEXT:  {{.*}} = llvm.mlir.constant(0 : index) : i32
-// CHECK32-NEXT:  {{.*}} = llvm.mlir.constant(42 : index) : i32
-// CHECK32-NEXT:  llvm.br ^bb2({{.*}} : i32)
-^bb1:	// pred: ^bb0
-  %c0 = constant 0 : index
-  %c42 = constant 42 : index
-  br ^bb2(%c0 : index)
-
-// CHECK-NEXT: ^bb2({{.*}}: i64):	// 2 preds: ^bb1, ^bb3
-// CHECK-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i64
-// CHECK-NEXT:  llvm.cond_br {{.*}}, ^bb3, ^bb4
-// CHECK32-NEXT: ^bb2({{.*}}: i32):	// 2 preds: ^bb1, ^bb3
-// CHECK32-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i32
-// CHECK32-NEXT:  llvm.cond_br {{.*}}, ^bb3, ^bb4
-^bb2(%0: index):	// 2 preds: ^bb1, ^bb3
-  %1 = cmpi slt, %0, %c42 : index
-  cond_br %1, ^bb3, ^bb4
-
-// CHECK-NEXT: ^bb3:	// pred: ^bb2
-// CHECK-NEXT:  {{.*}} = llvm.call @body_args({{.*}}) : (i64) -> i64
-// CHECK-NEXT:  {{.*}} = llvm.call @other({{.*}}, %arg0) : (i64, i32) -> i32
-// CHECK-NEXT:  {{.*}} = llvm.call @other({{.*}}, {{.*}}) : (i64, i32) -> i32
-// CHECK-NEXT:  {{.*}} = llvm.call @other({{.*}}, %arg1) : (i64, i32) -> i32
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(1 : index) : i64
-// CHECK-NEXT:  {{.*}} = llvm.add {{.*}}, {{.*}} : i64
-// CHECK-NEXT:  llvm.br ^bb2({{.*}} : i64)
-// CHECK32-NEXT: ^bb3:	// pred: ^bb2
-// CHECK32-NEXT:  {{.*}} = llvm.call @body_args({{.*}}) : (i32) -> i32
-// CHECK32-NEXT:  {{.*}} = llvm.call @other({{.*}}, %arg0) : (i32, i32) -> i32
-// CHECK32-NEXT:  {{.*}} = llvm.call @other({{.*}}, {{.*}}) : (i32, i32) -> i32
-// CHECK32-NEXT:  {{.*}} = llvm.call @other({{.*}}, %arg1) : (i32, i32) -> i32
-// CHECK32-NEXT:  {{.*}} = llvm.mlir.constant(1 : index) : i32
-// CHECK32-NEXT:  {{.*}} = llvm.add {{.*}}, {{.*}} : i32
-// CHECK32-NEXT:  llvm.br ^bb2({{.*}} : i32)
-^bb3:	// pred: ^bb2
-  %2 = call @body_args(%0) : (index) -> index
-  %3 = call @other(%2, %arg0) : (index, i32) -> i32
-  %4 = call @other(%2, %3) : (index, i32) -> i32
-  %5 = call @other(%2, %arg1) : (index, i32) -> i32
-  %c1 = constant 1 : index
-  %6 = addi %0, %c1 : index
-  br ^bb2(%6 : index)
-
-// CHECK-NEXT: ^bb4:	// pred: ^bb2
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(0 : index) : i64
-// CHECK-NEXT:  {{.*}} = llvm.call @other({{.*}}, {{.*}}) : (i64, i32) -> i32
-// CHECK-NEXT:  llvm.return {{.*}} : i32
-// CHECK32-NEXT: ^bb4:	// pred: ^bb2
-// CHECK32-NEXT:  {{.*}} = llvm.mlir.constant(0 : index) : i32
-// CHECK32-NEXT:  {{.*}} = llvm.call @other({{.*}}, {{.*}}) : (i32, i32) -> i32
-// CHECK32-NEXT:  llvm.return {{.*}} : i32
-^bb4:	// pred: ^bb2
-  %c0_0 = constant 0 : index
-  %7 = call @other(%c0_0, %c0_i32) : (index, i32) -> i32
-  return %7 : i32
-}
-
-// CHECK-LABEL: llvm.func @pre(i64)
-// CHECK32-LABEL: llvm.func @pre(i32)
-func private @pre(index)
-
-// CHECK-LABEL: llvm.func @body2(i64, i64)
-// CHECK32-LABEL: llvm.func @body2(i32, i32)
-func private @body2(index, index)
-
-// CHECK-LABEL: llvm.func @post(i64)
-// CHECK32-LABEL: llvm.func @post(i32)
-func private @post(index)
-
-// CHECK-LABEL: func @imperfectly_nested_loops() {
-// CHECK-NEXT:  llvm.br ^bb1
-func @imperfectly_nested_loops() {
-^bb0:
-  br ^bb1
-
-// CHECK-NEXT: ^bb1:	// pred: ^bb0
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(0 : index) : i64
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(42 : index) : i64
-// CHECK-NEXT:  llvm.br ^bb2({{.*}} : i64)
-^bb1:	// pred: ^bb0
-  %c0 = constant 0 : index
-  %c42 = constant 42 : index
-  br ^bb2(%c0 : index)
-
-// CHECK-NEXT: ^bb2({{.*}}: i64):	// 2 preds: ^bb1, ^bb7
-// CHECK-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i64
-// CHECK-NEXT:  llvm.cond_br {{.*}}, ^bb3, ^bb8
-^bb2(%0: index):	// 2 preds: ^bb1, ^bb7
-  %1 = cmpi slt, %0, %c42 : index
-  cond_br %1, ^bb3, ^bb8
-
-// CHECK-NEXT: ^bb3:
-// CHECK-NEXT:  llvm.call @pre({{.*}}) : (i64) -> ()
-// CHECK-NEXT:  llvm.br ^bb4
-^bb3:	// pred: ^bb2
-  call @pre(%0) : (index) -> ()
-  br ^bb4
-
-// CHECK-NEXT: ^bb4:	// pred: ^bb3
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(7 : index) : i64
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(56 : index) : i64
-// CHECK-NEXT:  llvm.br ^bb5({{.*}} : i64)
-^bb4:	// pred: ^bb3
-  %c7 = constant 7 : index
-  %c56 = constant 56 : index
-  br ^bb5(%c7 : index)
-
-// CHECK-NEXT: ^bb5({{.*}}: i64):	// 2 preds: ^bb4, ^bb6
-// CHECK-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i64
-// CHECK-NEXT:  llvm.cond_br {{.*}}, ^bb6, ^bb7
-^bb5(%2: index):	// 2 preds: ^bb4, ^bb6
-  %3 = cmpi slt, %2, %c56 : index
-  cond_br %3, ^bb6, ^bb7
-
-// CHECK-NEXT: ^bb6:	// pred: ^bb5
-// CHECK-NEXT:  llvm.call @body2({{.*}}, {{.*}}) : (i64, i64) -> ()
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(2 : index) : i64
-// CHECK-NEXT:  {{.*}} = llvm.add {{.*}}, {{.*}} : i64
-// CHECK-NEXT:  llvm.br ^bb5({{.*}} : i64)
-^bb6:	// pred: ^bb5
-  call @body2(%0, %2) : (index, index) -> ()
-  %c2 = constant 2 : index
-  %4 = addi %2, %c2 : index
-  br ^bb5(%4 : index)
-
-// CHECK-NEXT: ^bb7:	// pred: ^bb5
-// CHECK-NEXT:  llvm.call @post({{.*}}) : (i64) -> ()
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(1 : index) : i64
-// CHECK-NEXT:  {{.*}} = llvm.add {{.*}}, {{.*}} : i64
-// CHECK-NEXT:  llvm.br ^bb2({{.*}} : i64)
-^bb7:	// pred: ^bb5
-  call @post(%0) : (index) -> ()
-  %c1 = constant 1 : index
-  %5 = addi %0, %c1 : index
-  br ^bb2(%5 : index)
-
-// CHECK-NEXT: ^bb8:	// pred: ^bb2
-// CHECK-NEXT:  llvm.return
-^bb8:	// pred: ^bb2
-  return
-}
-
-// CHECK-LABEL: llvm.func @mid(i64)
-func private @mid(index)
-
-// CHECK-LABEL: llvm.func @body3(i64, i64)
-func private @body3(index, index)
-
-// A complete function transformation check.
-// CHECK-LABEL: func @more_imperfectly_nested_loops() {
-// CHECK-NEXT:  llvm.br ^bb1
-// CHECK-NEXT:^bb1:	// pred: ^bb0
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(0 : index) : i64
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(42 : index) : i64
-// CHECK-NEXT:  llvm.br ^bb2({{.*}} : i64)
-// CHECK-NEXT:^bb2({{.*}}: i64):	// 2 preds: ^bb1, ^bb11
-// CHECK-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i64
-// CHECK-NEXT:  llvm.cond_br {{.*}}, ^bb3, ^bb12
-// CHECK-NEXT:^bb3:	// pred: ^bb2
-// CHECK-NEXT:  llvm.call @pre({{.*}}) : (i64) -> ()
-// CHECK-NEXT:  llvm.br ^bb4
-// CHECK-NEXT:^bb4:	// pred: ^bb3
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(7 : index) : i64
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(56 : index) : i64
-// CHECK-NEXT:  llvm.br ^bb5({{.*}} : i64)
-// CHECK-NEXT:^bb5({{.*}}: i64):	// 2 preds: ^bb4, ^bb6
-// CHECK-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i64
-// CHECK-NEXT:  llvm.cond_br {{.*}}, ^bb6, ^bb7
-// CHECK-NEXT:^bb6:	// pred: ^bb5
-// CHECK-NEXT:  llvm.call @body2({{.*}}, {{.*}}) : (i64, i64) -> ()
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(2 : index) : i64
-// CHECK-NEXT:  {{.*}} = llvm.add {{.*}}, {{.*}} : i64
-// CHECK-NEXT:  llvm.br ^bb5({{.*}} : i64)
-// CHECK-NEXT:^bb7:	// pred: ^bb5
-// CHECK-NEXT:  llvm.call @mid({{.*}}) : (i64) -> ()
-// CHECK-NEXT:  llvm.br ^bb8
-// CHECK-NEXT:^bb8:	// pred: ^bb7
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(18 : index) : i64
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(37 : index) : i64
-// CHECK-NEXT:  llvm.br ^bb9({{.*}} : i64)
-// CHECK-NEXT:^bb9({{.*}}: i64):	// 2 preds: ^bb8, ^bb10
-// CHECK-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i64
-// CHECK-NEXT:  llvm.cond_br {{.*}}, ^bb10, ^bb11
-// CHECK-NEXT:^bb10:	// pred: ^bb9
-// CHECK-NEXT:  llvm.call @body3({{.*}}, {{.*}}) : (i64, i64) -> ()
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(3 : index) : i64
-// CHECK-NEXT:  {{.*}} = llvm.add {{.*}}, {{.*}} : i64
-// CHECK-NEXT:  llvm.br ^bb9({{.*}} : i64)
-// CHECK-NEXT:^bb11:	// pred: ^bb9
-// CHECK-NEXT:  llvm.call @post({{.*}}) : (i64) -> ()
-// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(1 : index) : i64
-// CHECK-NEXT:  {{.*}} = llvm.add {{.*}}, {{.*}} : i64
-// CHECK-NEXT:  llvm.br ^bb2({{.*}} : i64)
-// CHECK-NEXT:^bb12:	// pred: ^bb2
-// CHECK-NEXT:  llvm.return
-// CHECK-NEXT: }
-func @more_imperfectly_nested_loops() {
-^bb0:
-  br ^bb1
-^bb1:	// pred: ^bb0
-  %c0 = constant 0 : index
-  %c42 = constant 42 : index
-  br ^bb2(%c0 : index)
-^bb2(%0: index):	// 2 preds: ^bb1, ^bb11
-  %1 = cmpi slt, %0, %c42 : index
-  cond_br %1, ^bb3, ^bb12
-^bb3:	// pred: ^bb2
-  call @pre(%0) : (index) -> ()
-  br ^bb4
-^bb4:	// pred: ^bb3
-  %c7 = constant 7 : index
-  %c56 = constant 56 : index
-  br ^bb5(%c7 : index)
-^bb5(%2: index):	// 2 preds: ^bb4, ^bb6
-  %3 = cmpi slt, %2, %c56 : index
-  cond_br %3, ^bb6, ^bb7
-^bb6:	// pred: ^bb5
-  call @body2(%0, %2) : (index, index) -> ()
-  %c2 = constant 2 : index
-  %4 = addi %2, %c2 : index
-  br ^bb5(%4 : index)
-^bb7:	// pred: ^bb5
-  call @mid(%0) : (index) -> ()
-  br ^bb8
-^bb8:	// pred: ^bb7
-  %c18 = constant 18 : index
-  %c37 = constant 37 : index
-  br ^bb9(%c18 : index)
-^bb9(%5: index):	// 2 preds: ^bb8, ^bb10
-  %6 = cmpi slt, %5, %c37 : index
-  cond_br %6, ^bb10, ^bb11
-^bb10:	// pred: ^bb9
-  call @body3(%0, %5) : (index, index) -> ()
-  %c3 = constant 3 : index
-  %7 = addi %5, %c3 : index
-  br ^bb9(%7 : index)
-^bb11:	// pred: ^bb9
-  call @post(%0) : (index) -> ()
-  %c1 = constant 1 : index
-  %8 = addi %0, %c1 : index
-  br ^bb2(%8 : index)
-^bb12:	// pred: ^bb2
-  return
-}
-
-// CHECK-LABEL: llvm.func @get_i64() -> i64
-func private @get_i64() -> (i64)
-// CHECK-LABEL: llvm.func @get_f32() -> f32
-func private @get_f32() -> (f32)
-// CHECK-LABEL: llvm.func @get_c16() -> !llvm.struct<(f16, f16)>
-func private @get_c16() -> (complex<f16>)
-// CHECK-LABEL: llvm.func @get_c32() -> !llvm.struct<(f32, f32)>
-func private @get_c32() -> (complex<f32>)
-// CHECK-LABEL: llvm.func @get_c64() -> !llvm.struct<(f64, f64)>
-func private @get_c64() -> (complex<f64>)
-// CHECK-LABEL: llvm.func @get_memref() -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>
-// CHECK32-LABEL: llvm.func @get_memref() -> !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>
-func private @get_memref() -> (memref<42x?x10x?xf32>)
-
-// CHECK-LABEL: llvm.func @multireturn() -> !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)> {
-// CHECK32-LABEL: llvm.func @multireturn() -> !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)> {
-func @multireturn() -> (i64, f32, memref<42x?x10x?xf32>) {
-^bb0:
-// CHECK-NEXT:  {{.*}} = llvm.call @get_i64() : () -> i64
-// CHECK-NEXT:  {{.*}} = llvm.call @get_f32() : () -> f32
-// CHECK-NEXT:  {{.*}} = llvm.call @get_memref() : () -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>
-// CHECK32-NEXT:  {{.*}} = llvm.call @get_i64() : () -> i64
-// CHECK32-NEXT:  {{.*}} = llvm.call @get_f32() : () -> f32
-// CHECK32-NEXT:  {{.*}} = llvm.call @get_memref() : () -> !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>
-  %0 = call @get_i64() : () -> (i64)
-  %1 = call @get_f32() : () -> (f32)
-  %2 = call @get_memref() : () -> (memref<42x?x10x?xf32>)
-// CHECK-NEXT:  {{.*}} = llvm.mlir.undef : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)>
-// CHECK-NEXT:  {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)>
-// CHECK-NEXT:  {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)>
-// CHECK-NEXT:  {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)>
-// CHECK-NEXT:  llvm.return {{.*}} : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)>
-// CHECK32-NEXT:  {{.*}} = llvm.mlir.undef : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)>
-// CHECK32-NEXT:  {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)>
-// CHECK32-NEXT:  {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)>
-// CHECK32-NEXT:  {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)>
-// CHECK32-NEXT:  llvm.return {{.*}} : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)>
-  return %0, %1, %2 : i64, f32, memref<42x?x10x?xf32>
-}
-
-
-// CHECK-LABEL: llvm.func @multireturn_caller() {
-// CHECK32-LABEL: llvm.func @multireturn_caller() {
-func @multireturn_caller() {
-^bb0:
-// CHECK-NEXT:  {{.*}} = llvm.call @multireturn() : () -> !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)>
-// CHECK-NEXT:  {{.*}} = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)>
-// CHECK-NEXT:  {{.*}} = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)>
-// CHECK-NEXT:  {{.*}} = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)>
-// CHECK32-NEXT:  {{.*}} = llvm.call @multireturn() : () -> !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)>
-// CHECK32-NEXT:  {{.*}} = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)>
-// CHECK32-NEXT:  {{.*}} = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)>
-// CHECK32-NEXT:  {{.*}} = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)>
-  %0:3 = call @multireturn() : () -> (i64, f32, memref<42x?x10x?xf32>)
-  %1 = constant 42 : i64
-// CHECK:       {{.*}} = llvm.add {{.*}}, {{.*}} : i64
-  %2 = addi %0#0, %1 : i64
-  %3 = constant 42.0 : f32
-// CHECK:       {{.*}} = llvm.fadd {{.*}}, {{.*}} : f32
-  %4 = addf %0#1, %3 : f32
-  %5 = constant 0 : index
-  return
-}
-
-// CHECK-LABEL: llvm.func @vector_ops(%arg0: vector<4xf32>, %arg1: vector<4xi1>, %arg2: vector<4xi64>, %arg3: vector<4xi64>) -> vector<4xf32> {
-func @vector_ops(%arg0: vector<4xf32>, %arg1: vector<4xi1>, %arg2: vector<4xi64>, %arg3: vector<4xi64>) -> vector<4xf32> {
-// CHECK-NEXT:  %0 = llvm.mlir.constant(dense<4.200000e+01> : vector<4xf32>) : vector<4xf32>
-  %0 = constant dense<42.> : vector<4xf32>
-// CHECK-NEXT:  %1 = llvm.fadd %arg0, %0 : vector<4xf32>
-  %1 = addf %arg0, %0 : vector<4xf32>
-// CHECK-NEXT:  %2 = llvm.sdiv %arg2, %arg2 : vector<4xi64>
-  %3 = divi_signed %arg2, %arg2 : vector<4xi64>
-// CHECK-NEXT:  %3 = llvm.udiv %arg2, %arg2 : vector<4xi64>
-  %4 = divi_unsigned %arg2, %arg2 : vector<4xi64>
-// CHECK-NEXT:  %4 = llvm.srem %arg2, %arg2 : vector<4xi64>
-  %5 = remi_signed %arg2, %arg2 : vector<4xi64>
-// CHECK-NEXT:  %5 = llvm.urem %arg2, %arg2 : vector<4xi64>
-  %6 = remi_unsigned %arg2, %arg2 : vector<4xi64>
-// CHECK-NEXT:  %6 = llvm.fdiv %arg0, %0 : vector<4xf32>
-  %7 = divf %arg0, %0 : vector<4xf32>
-// CHECK-NEXT:  %7 = llvm.frem %arg0, %0 : vector<4xf32>
-  %8 = remf %arg0, %0 : vector<4xf32>
-// CHECK-NEXT:  %8 = llvm.and %arg2, %arg3 : vector<4xi64>
-  %9 = and %arg2, %arg3 : vector<4xi64>
-// CHECK-NEXT:  %9 = llvm.or %arg2, %arg3 : vector<4xi64>
-  %10 = or %arg2, %arg3 : vector<4xi64>
-// CHECK-NEXT:  %10 = llvm.xor %arg2, %arg3 : vector<4xi64>
-  %11 = xor %arg2, %arg3 : vector<4xi64>
-// CHECK-NEXT:  %11 = llvm.shl %arg2, %arg2 : vector<4xi64>
-  %12 = shift_left %arg2, %arg2 : vector<4xi64>
-// CHECK-NEXT:  %12 = llvm.ashr %arg2, %arg2 : vector<4xi64>
-  %13 = shift_right_signed %arg2, %arg2 : vector<4xi64>
-// CHECK-NEXT:  %13 = llvm.lshr %arg2, %arg2 : vector<4xi64>
-  %14 = shift_right_unsigned %arg2, %arg2 : vector<4xi64>
-  return %1 : vector<4xf32>
-}
-
-// CHECK-LABEL: @ops
-func @ops(f32, f32, i32, i32, f64) -> (f32, i32) {
-^bb0(%arg0: f32, %arg1: f32, %arg2: i32, %arg3: i32, %arg4: f64):
-// CHECK-NEXT:  %0 = llvm.fsub %arg0, %arg1 : f32
-  %0 = subf %arg0, %arg1: f32
-// CHECK-NEXT:  %1 = llvm.sub %arg2, %arg3 : i32
-  %1 = subi %arg2, %arg3: i32
-// CHECK-NEXT:  %2 = llvm.icmp "slt" %arg2, %1 : i32
-  %2 = cmpi slt, %arg2, %1 : i32
-// CHECK-NEXT:  %3 = llvm.sdiv %arg2, %arg3 : i32
-  %3 = divi_signed %arg2, %arg3 : i32
-// CHECK-NEXT:  %4 = llvm.udiv %arg2, %arg3 : i32
-  %4 = divi_unsigned %arg2, %arg3 : i32
-// CHECK-NEXT:  %5 = llvm.srem %arg2, %arg3 : i32
-  %5 = remi_signed %arg2, %arg3 : i32
-// CHECK-NEXT:  %6 = llvm.urem %arg2, %arg3 : i32
-  %6 = remi_unsigned %arg2, %arg3 : i32
-// CHECK-NEXT:  %7 = llvm.select %2, %arg2, %arg3 : i1, i32
-  %7 = select %2, %arg2, %arg3 : i32
-// CHECK-NEXT:  %8 = llvm.fdiv %arg0, %arg1 : f32
-  %8 = divf %arg0, %arg1 : f32
-// CHECK-NEXT:  %9 = llvm.frem %arg0, %arg1 : f32
-  %9 = remf %arg0, %arg1 : f32
-// CHECK-NEXT: %10 = llvm.and %arg2, %arg3 : i32
-  %10 = and %arg2, %arg3 : i32
-// CHECK-NEXT: %11 = llvm.or %arg2, %arg3 : i32
-  %11 = or %arg2, %arg3 : i32
-// CHECK-NEXT: %12 = llvm.xor %arg2, %arg3 : i32
-  %12 = xor %arg2, %arg3 : i32
-// CHECK-NEXT: %13 = "llvm.intr.exp"(%arg0) : (f32) -> f32
-  %13 = math.exp %arg0 : f32
-// CHECK-NEXT: %14 = "llvm.intr.exp2"(%arg0) : (f32) -> f32
-  %14 = math.exp2 %arg0 : f32
-// CHECK-NEXT: %15 = llvm.mlir.constant(7.900000e-01 : f64) : f64
-  %15 = constant 7.9e-01 : f64
-// CHECK-NEXT: %16 = llvm.shl %arg2, %arg3 : i32
-  %16 = shift_left %arg2, %arg3 : i32
-// CHECK-NEXT: %17 = llvm.ashr %arg2, %arg3 : i32
-  %17 = shift_right_signed %arg2, %arg3 : i32
-// CHECK-NEXT: %18 = llvm.lshr %arg2, %arg3 : i32
-  %18 = shift_right_unsigned %arg2, %arg3 : i32
-// CHECK-NEXT: %{{[0-9]+}} = "llvm.intr.sqrt"(%arg0) : (f32) -> f32
-  %19 = math.sqrt %arg0 : f32
-// CHECK-NEXT: %{{[0-9]+}} = "llvm.intr.sqrt"(%arg4) : (f64) -> f64
-  %20 = math.sqrt %arg4 : f64
-  return %0, %4 : f32, i32
-}
-
-// Checking conversion of index types to integers using i1, assuming no target
-// system would have a 1-bit address space.  Otherwise, we would have had to
-// make this test dependent on the pointer size on the target system.
-// CHECK-LABEL: @index_cast
-func @index_cast(%arg0: index, %arg1: i1) {
-// CHECK-NEXT: = llvm.trunc %arg0 : i{{.*}} to i1
-  %0 = index_cast %arg0: index to i1
-// CHECK-NEXT: = llvm.sext %arg1 : i1 to i{{.*}}
-  %1 = index_cast %arg1: i1 to index
-  return
-}
-
-// CHECK-LABEL: @vector_index_cast
-func @vector_index_cast(%arg0: vector<2xindex>, %arg1: vector<2xi1>) {
-// CHECK-NEXT: = llvm.trunc %{{.*}} : vector<2xi{{.*}}> to vector<2xi1>
-  %0 = index_cast %arg0: vector<2xindex> to vector<2xi1>
-// CHECK-NEXT: = llvm.sext %{{.*}} : vector<2xi1> to vector<2xi{{.*}}>
-  %1 = index_cast %arg1: vector<2xi1> to vector<2xindex>
-  return
-}
-
-// Checking conversion of signed integer types to floating point.
-// CHECK-LABEL: @sitofp
-func @sitofp(%arg0 : i32, %arg1 : i64) {
-// CHECK-NEXT: = llvm.sitofp {{.*}} : i32 to f32
-  %0 = sitofp %arg0: i32 to f32
-// CHECK-NEXT: = llvm.sitofp {{.*}} : i32 to f64
-  %1 = sitofp %arg0: i32 to f64
-// CHECK-NEXT: = llvm.sitofp {{.*}} : i64 to f32
-  %2 = sitofp %arg1: i64 to f32
-// CHECK-NEXT: = llvm.sitofp {{.*}} : i64 to f64
-  %3 = sitofp %arg1: i64 to f64
-  return
-}
-
-// Checking conversion of integer vectors to floating point vector types.
-// CHECK-LABEL: @sitofp_vector
-func @sitofp_vector(%arg0 : vector<2xi16>, %arg1 : vector<2xi32>, %arg2 : vector<2xi64>) {
-// CHECK-NEXT: = llvm.sitofp {{.*}} : vector<2xi16> to vector<2xf32>
-  %0 = sitofp %arg0: vector<2xi16> to vector<2xf32>
-// CHECK-NEXT: = llvm.sitofp {{.*}} : vector<2xi16> to vector<2xf64>
-  %1 = sitofp %arg0: vector<2xi16> to vector<2xf64>
-// CHECK-NEXT: = llvm.sitofp {{.*}} : vector<2xi32> to vector<2xf32>
-  %2 = sitofp %arg1: vector<2xi32> to vector<2xf32>
-// CHECK-NEXT: = llvm.sitofp {{.*}} : vector<2xi32> to vector<2xf64>
-  %3 = sitofp %arg1: vector<2xi32> to vector<2xf64>
-// CHECK-NEXT: = llvm.sitofp {{.*}} : vector<2xi64> to vector<2xf32>
-  %4 = sitofp %arg2: vector<2xi64> to vector<2xf32>
-// CHECK-NEXT: = llvm.sitofp {{.*}} : vector<2xi64> to vector<2xf64>
-  %5 = sitofp %arg2: vector<2xi64> to vector<2xf64>
-  return
-}
-
-// Checking conversion of unsigned integer types to floating point.
-// CHECK-LABEL: @uitofp
-func @uitofp(%arg0 : i32, %arg1 : i64) {
-// CHECK-NEXT: = llvm.uitofp {{.*}} : i32 to f32
-  %0 = uitofp %arg0: i32 to f32
-// CHECK-NEXT: = llvm.uitofp {{.*}} : i32 to f64
-  %1 = uitofp %arg0: i32 to f64
-// CHECK-NEXT: = llvm.uitofp {{.*}} : i64 to f32
-  %2 = uitofp %arg1: i64 to f32
-// CHECK-NEXT: = llvm.uitofp {{.*}} : i64 to f64
-  %3 = uitofp %arg1: i64 to f64
-  return
-}
-
-// Checking conversion of integer types to floating point.
-// CHECK-LABEL: @fpext
-func @fpext(%arg0 : f16, %arg1 : f32) {
-// CHECK-NEXT: = llvm.fpext {{.*}} : f16 to f32
-  %0 = fpext %arg0: f16 to f32
-// CHECK-NEXT: = llvm.fpext {{.*}} : f16 to f64
-  %1 = fpext %arg0: f16 to f64
-// CHECK-NEXT: = llvm.fpext {{.*}} : f32 to f64
-  %2 = fpext %arg1: f32 to f64
-  return
-}
-
-// Checking conversion of integer types to floating point.
-// CHECK-LABEL: @fpext
-func @fpext_vector(%arg0 : vector<2xf16>, %arg1 : vector<2xf32>) {
-// CHECK-NEXT: = llvm.fpext {{.*}} : vector<2xf16> to vector<2xf32>
-  %0 = fpext %arg0: vector<2xf16> to vector<2xf32>
-// CHECK-NEXT: = llvm.fpext {{.*}} : vector<2xf16> to vector<2xf64>
-  %1 = fpext %arg0: vector<2xf16> to vector<2xf64>
-// CHECK-NEXT: = llvm.fpext {{.*}} : vector<2xf32> to vector<2xf64>
-  %2 = fpext %arg1: vector<2xf32> to vector<2xf64>
-  return
-}
-
-// Checking conversion of floating point to integer types.
-// CHECK-LABEL: @fptosi
-func @fptosi(%arg0 : f32, %arg1 : f64) {
-// CHECK-NEXT: = llvm.fptosi {{.*}} : f32 to i32
-  %0 = fptosi %arg0: f32 to i32
-// CHECK-NEXT: = llvm.fptosi {{.*}} : f32 to i64
-  %1 = fptosi %arg0: f32 to i64
-// CHECK-NEXT: = llvm.fptosi {{.*}} : f64 to i32
-  %2 = fptosi %arg1: f64 to i32
-// CHECK-NEXT: = llvm.fptosi {{.*}} : f64 to i64
-  %3 = fptosi %arg1: f64 to i64
-  return
-}
-
-// Checking conversion of floating point vectors to integer vector types.
-// CHECK-LABEL: @fptosi_vector
-func @fptosi_vector(%arg0 : vector<2xf16>, %arg1 : vector<2xf32>, %arg2 : vector<2xf64>) {
-// CHECK-NEXT: = llvm.fptosi {{.*}} : vector<2xf16> to vector<2xi32>
-  %0 = fptosi %arg0: vector<2xf16> to vector<2xi32>
-// CHECK-NEXT: = llvm.fptosi {{.*}} : vector<2xf16> to vector<2xi64>
-  %1 = fptosi %arg0: vector<2xf16> to vector<2xi64>
-// CHECK-NEXT: = llvm.fptosi {{.*}} : vector<2xf32> to vector<2xi32>
-  %2 = fptosi %arg1: vector<2xf32> to vector<2xi32>
-// CHECK-NEXT: = llvm.fptosi {{.*}} : vector<2xf32> to vector<2xi64>
-  %3 = fptosi %arg1: vector<2xf32> to vector<2xi64>
-// CHECK-NEXT: = llvm.fptosi {{.*}} : vector<2xf64> to vector<2xi32>
-  %4 = fptosi %arg2: vector<2xf64> to vector<2xi32>
-// CHECK-NEXT: = llvm.fptosi {{.*}} : vector<2xf64> to vector<2xi64>
-  %5 = fptosi %arg2: vector<2xf64> to vector<2xi64>
-  return
-}
-
-// Checking conversion of floating point to integer types.
-// CHECK-LABEL: @fptoui
-func @fptoui(%arg0 : f32, %arg1 : f64) {
-// CHECK-NEXT: = llvm.fptoui {{.*}} : f32 to i32
-  %0 = fptoui %arg0: f32 to i32
-// CHECK-NEXT: = llvm.fptoui {{.*}} : f32 to i64
-  %1 = fptoui %arg0: f32 to i64
-// CHECK-NEXT: = llvm.fptoui {{.*}} : f64 to i32
-  %2 = fptoui %arg1: f64 to i32
-// CHECK-NEXT: = llvm.fptoui {{.*}} : f64 to i64
-  %3 = fptoui %arg1: f64 to i64
-  return
-}
-
-// Checking conversion of floating point vectors to integer vector types.
-// CHECK-LABEL: @fptoui_vector
-func @fptoui_vector(%arg0 : vector<2xf16>, %arg1 : vector<2xf32>, %arg2 : vector<2xf64>) {
-// CHECK-NEXT: = llvm.fptoui {{.*}} : vector<2xf16> to vector<2xi32>
-  %0 = fptoui %arg0: vector<2xf16> to vector<2xi32>
-// CHECK-NEXT: = llvm.fptoui {{.*}} : vector<2xf16> to vector<2xi64>
-  %1 = fptoui %arg0: vector<2xf16> to vector<2xi64>
-// CHECK-NEXT: = llvm.fptoui {{.*}} : vector<2xf32> to vector<2xi32>
-  %2 = fptoui %arg1: vector<2xf32> to vector<2xi32>
-// CHECK-NEXT: = llvm.fptoui {{.*}} : vector<2xf32> to vector<2xi64>
-  %3 = fptoui %arg1: vector<2xf32> to vector<2xi64>
-// CHECK-NEXT: = llvm.fptoui {{.*}} : vector<2xf64> to vector<2xi32>
-  %4 = fptoui %arg2: vector<2xf64> to vector<2xi32>
-// CHECK-NEXT: = llvm.fptoui {{.*}} : vector<2xf64> to vector<2xi64>
-  %5 = fptoui %arg2: vector<2xf64> to vector<2xi64>
-  return
-}
-
-// Checking conversion of integer vectors to floating point vector types.
-// CHECK-LABEL: @uitofp_vector
-func @uitofp_vector(%arg0 : vector<2xi16>, %arg1 : vector<2xi32>, %arg2 : vector<2xi64>) {
-// CHECK-NEXT: = llvm.uitofp {{.*}} : vector<2xi16> to vector<2xf32>
-  %0 = uitofp %arg0: vector<2xi16> to vector<2xf32>
-// CHECK-NEXT: = llvm.uitofp {{.*}} : vector<2xi16> to vector<2xf64>
-  %1 = uitofp %arg0: vector<2xi16> to vector<2xf64>
-// CHECK-NEXT: = llvm.uitofp {{.*}} : vector<2xi32> to vector<2xf32>
-  %2 = uitofp %arg1: vector<2xi32> to vector<2xf32>
-// CHECK-NEXT: = llvm.uitofp {{.*}} : vector<2xi32> to vector<2xf64>
-  %3 = uitofp %arg1: vector<2xi32> to vector<2xf64>
-// CHECK-NEXT: = llvm.uitofp {{.*}} : vector<2xi64> to vector<2xf32>
-  %4 = uitofp %arg2: vector<2xi64> to vector<2xf32>
-// CHECK-NEXT: = llvm.uitofp {{.*}} : vector<2xi64> to vector<2xf64>
-  %5 = uitofp %arg2: vector<2xi64> to vector<2xf64>
-  return
-}
-
-// Checking conversion of integer types to floating point.
-// CHECK-LABEL: @fptrunc
-func @fptrunc(%arg0 : f32, %arg1 : f64) {
-// CHECK-NEXT: = llvm.fptrunc {{.*}} : f32 to f16
-  %0 = fptrunc %arg0: f32 to f16
-// CHECK-NEXT: = llvm.fptrunc {{.*}} : f64 to f16
-  %1 = fptrunc %arg1: f64 to f16
-// CHECK-NEXT: = llvm.fptrunc {{.*}} : f64 to f32
-  %2 = fptrunc %arg1: f64 to f32
-  return
-}
-
-// Checking conversion of integer types to floating point.
-// CHECK-LABEL: @fptrunc
-func @fptrunc_vector(%arg0 : vector<2xf32>, %arg1 : vector<2xf64>) {
-// CHECK-NEXT: = llvm.fptrunc {{.*}} : vector<2xf32> to vector<2xf16>
-  %0 = fptrunc %arg0: vector<2xf32> to vector<2xf16>
-// CHECK-NEXT: = llvm.fptrunc {{.*}} : vector<2xf64> to vector<2xf16>
-  %1 = fptrunc %arg1: vector<2xf64> to vector<2xf16>
-// CHECK-NEXT: = llvm.fptrunc {{.*}} : vector<2xf64> to vector<2xf32>
-  %2 = fptrunc %arg1: vector<2xf64> to vector<2xf32>
-  return
-}
-
-// Check sign and zero extension and truncation of integers.
-// CHECK-LABEL: @integer_extension_and_truncation
-func @integer_extension_and_truncation(%arg0 : i3) {
-// CHECK-NEXT: = llvm.sext %arg0 : i3 to i6
-  %0 = sexti %arg0 : i3 to i6
-// CHECK-NEXT: = llvm.zext %arg0 : i3 to i6
-  %1 = zexti %arg0 : i3 to i6
-// CHECK-NEXT: = llvm.trunc %arg0 : i3 to i2
-   %2 = trunci %arg0 : i3 to i2
-  return
-}
-
-// CHECK-LABEL: @dfs_block_order
-func @dfs_block_order(%arg0: i32) -> (i32) {
-// CHECK-NEXT:  %[[CST:.*]] = llvm.mlir.constant(42 : i32) : i32
-  %0 = constant 42 : i32
-// CHECK-NEXT:  llvm.br ^bb2
-  br ^bb2
-
-// CHECK-NEXT: ^bb1:
-// CHECK-NEXT:  %[[ADD:.*]] = llvm.add %arg0, %[[CST]] : i32
-// CHECK-NEXT:  llvm.return %[[ADD]] : i32
-^bb1:
-  %2 = addi %arg0, %0 : i32
-  return %2 : i32
-
-// CHECK-NEXT: ^bb2:
-^bb2:
-// CHECK-NEXT:  llvm.br ^bb1
-  br ^bb1
-}
-
-// CHECK-LABEL: func @fcmp(%arg0: f32, %arg1: f32) {
-func @fcmp(f32, f32) -> () {
-^bb0(%arg0: f32, %arg1: f32):
-  // CHECK:      llvm.fcmp "oeq" %arg0, %arg1 : f32
-  // CHECK-NEXT: llvm.fcmp "ogt" %arg0, %arg1 : f32
-  // CHECK-NEXT: llvm.fcmp "oge" %arg0, %arg1 : f32
-  // CHECK-NEXT: llvm.fcmp "olt" %arg0, %arg1 : f32
-  // CHECK-NEXT: llvm.fcmp "ole" %arg0, %arg1 : f32
-  // CHECK-NEXT: llvm.fcmp "one" %arg0, %arg1 : f32
-  // CHECK-NEXT: llvm.fcmp "ord" %arg0, %arg1 : f32
-  // CHECK-NEXT: llvm.fcmp "ueq" %arg0, %arg1 : f32
-  // CHECK-NEXT: llvm.fcmp "ugt" %arg0, %arg1 : f32
-  // CHECK-NEXT: llvm.fcmp "uge" %arg0, %arg1 : f32
-  // CHECK-NEXT: llvm.fcmp "ult" %arg0, %arg1 : f32
-  // CHECK-NEXT: llvm.fcmp "ule" %arg0, %arg1 : f32
-  // CHECK-NEXT: llvm.fcmp "une" %arg0, %arg1 : f32
-  // CHECK-NEXT: llvm.fcmp "uno" %arg0, %arg1 : f32
-  // CHECK-NEXT: llvm.return
-  %1 = cmpf oeq, %arg0, %arg1 : f32
-  %2 = cmpf ogt, %arg0, %arg1 : f32
-  %3 = cmpf oge, %arg0, %arg1 : f32
-  %4 = cmpf olt, %arg0, %arg1 : f32
-  %5 = cmpf ole, %arg0, %arg1 : f32
-  %6 = cmpf one, %arg0, %arg1 : f32
-  %7 = cmpf ord, %arg0, %arg1 : f32
-  %8 = cmpf ueq, %arg0, %arg1 : f32
-  %9 = cmpf ugt, %arg0, %arg1 : f32
-  %10 = cmpf uge, %arg0, %arg1 : f32
-  %11 = cmpf ult, %arg0, %arg1 : f32
-  %12 = cmpf ule, %arg0, %arg1 : f32
-  %13 = cmpf une, %arg0, %arg1 : f32
-  %14 = cmpf uno, %arg0, %arg1 : f32
-
-  return
-}
-
-// CHECK-LABEL: @splat
-// CHECK-SAME: %[[A:arg[0-9]+]]: vector<4xf32>
-// CHECK-SAME: %[[ELT:arg[0-9]+]]: f32
-func @splat(%a: vector<4xf32>, %b: f32) -> vector<4xf32> {
-  %vb = splat %b : vector<4xf32>
-  %r = mulf %a, %vb : vector<4xf32>
-  return %r : vector<4xf32>
-}
-// CHECK-NEXT: %[[UNDEF:[0-9]+]] = llvm.mlir.undef : vector<4xf32>
-// CHECK-NEXT: %[[ZERO:[0-9]+]] = llvm.mlir.constant(0 : i32) : i32
-// CHECK-NEXT: %[[V:[0-9]+]] = llvm.insertelement %[[ELT]], %[[UNDEF]][%[[ZERO]] : i32] : vector<4xf32>
-// CHECK-NEXT: %[[SPLAT:[0-9]+]] = llvm.shufflevector %[[V]], %[[UNDEF]] [0 : i32, 0 : i32, 0 : i32, 0 : i32]
-// CHECK-NEXT: %[[SCALE:[0-9]+]] = llvm.fmul %[[A]], %[[SPLAT]] : vector<4xf32>
-// CHECK-NEXT: llvm.return %[[SCALE]] : vector<4xf32>
-
-// CHECK-LABEL: func @view(
-// CHECK: %[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64, %[[ARG2:.*]]: i64
-func @view(%arg0 : index, %arg1 : index, %arg2 : index) {
-  // CHECK: llvm.mlir.constant(2048 : index) : i64
-  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
-  %0 = memref.alloc() : memref<2048xi8>
-
-  // Test two dynamic sizes.
-  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[BASE_PTR:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
-  // CHECK: %[[SHIFTED_BASE_PTR:.*]] = llvm.getelementptr %[[BASE_PTR]][%[[ARG2]]] : (!llvm.ptr<i8>, i64) -> !llvm.ptr<i8>
-  // CHECK: %[[CAST_SHIFTED_BASE_PTR:.*]] = llvm.bitcast %[[SHIFTED_BASE_PTR]] : !llvm.ptr<i8> to !llvm.ptr<f32>
-  // CHECK: llvm.insertvalue %[[CAST_SHIFTED_BASE_PTR]], %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64
-  // CHECK: llvm.insertvalue %[[C0]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.insertvalue %[[ARG1]], %{{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.mlir.constant(1 : index) : i64
-  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.insertvalue %[[ARG0]], %{{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.mul %{{.*}}, %[[ARG1]]
-  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  %1 = memref.view %0[%arg2][%arg0, %arg1] : memref<2048xi8> to memref<?x?xf32>
-
-  // Test one dynamic size.
-  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[BASE_PTR_2:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
-  // CHECK: %[[SHIFTED_BASE_PTR_2:.*]] = llvm.getelementptr %[[BASE_PTR_2]][%[[ARG2]]] : (!llvm.ptr<i8>, i64) -> !llvm.ptr<i8>
-  // CHECK: %[[CAST_SHIFTED_BASE_PTR_2:.*]] = llvm.bitcast %[[SHIFTED_BASE_PTR_2]] : !llvm.ptr<i8> to !llvm.ptr<f32>
-  // CHECK: llvm.insertvalue %[[CAST_SHIFTED_BASE_PTR_2]], %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[C0_2:.*]] = llvm.mlir.constant(0 : index) : i64
-  // CHECK: llvm.insertvalue %[[C0_2]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.insertvalue %[[ARG1]], %{{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.mlir.constant(1 : index) : i64
-  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.mlir.constant(4 : index) : i64
-  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.mul %{{.*}}, %[[ARG1]]
-  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  %3 = memref.view %0[%arg2][%arg1] : memref<2048xi8> to memref<4x?xf32>
-
-  // Test static sizes.
-  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[BASE_PTR_3:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
-  // CHECK: %[[SHIFTED_BASE_PTR_3:.*]] = llvm.getelementptr %[[BASE_PTR_3]][%[[ARG2]]] : (!llvm.ptr<i8>, i64) -> !llvm.ptr<i8>
-  // CHECK: %[[CAST_SHIFTED_BASE_PTR_3:.*]] = llvm.bitcast %[[SHIFTED_BASE_PTR_3]] : !llvm.ptr<i8> to !llvm.ptr<f32>
-  // CHECK: llvm.insertvalue %[[CAST_SHIFTED_BASE_PTR_3]], %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[C0_3:.*]] = llvm.mlir.constant(0 : index) : i64
-  // CHECK: llvm.insertvalue %[[C0_3]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.mlir.constant(4 : index) : i64
-  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.mlir.constant(1 : index) : i64
-  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.mlir.constant(64 : index) : i64
-  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.mlir.constant(4 : index) : i64
-  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  %5 = memref.view %0[%arg2][] : memref<2048xi8> to memref<64x4xf32>
-
-  // Test view memory space.
-  // CHECK: llvm.mlir.constant(2048 : index) : i64
-  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<i8, 4>, ptr<i8, 4>, i64, array<1 x i64>, array<1 x i64>)>
-  %6 = memref.alloc() : memref<2048xi8, 4>
-
-  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32, 4>, ptr<f32, 4>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[BASE_PTR_4:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<i8, 4>, ptr<i8, 4>, i64, array<1 x i64>, array<1 x i64>)>
-  // CHECK: %[[SHIFTED_BASE_PTR_4:.*]] = llvm.getelementptr %[[BASE_PTR_4]][%[[ARG2]]] : (!llvm.ptr<i8, 4>, i64) -> !llvm.ptr<i8, 4>
-  // CHECK: %[[CAST_SHIFTED_BASE_PTR_4:.*]] = llvm.bitcast %[[SHIFTED_BASE_PTR_4]] : !llvm.ptr<i8, 4> to !llvm.ptr<f32, 4>
-  // CHECK: llvm.insertvalue %[[CAST_SHIFTED_BASE_PTR_4]], %{{.*}}[1] : !llvm.struct<(ptr<f32, 4>, ptr<f32, 4>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[C0_4:.*]] = llvm.mlir.constant(0 : index) : i64
-  // CHECK: llvm.insertvalue %[[C0_4]], %{{.*}}[2] : !llvm.struct<(ptr<f32, 4>, ptr<f32, 4>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.mlir.constant(4 : index) : i64
-  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 1] : !llvm.struct<(ptr<f32, 4>, ptr<f32, 4>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.mlir.constant(1 : index) : i64
-  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm.struct<(ptr<f32, 4>, ptr<f32, 4>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.mlir.constant(64 : index) : i64
-  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm.struct<(ptr<f32, 4>, ptr<f32, 4>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.mlir.constant(4 : index) : i64
-  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr<f32, 4>, ptr<f32, 4>, i64, array<2 x i64>, array<2 x i64>)>
-  %7 = memref.view %6[%arg2][] : memref<2048xi8, 4> to memref<64x4xf32, 4>
-
-  return
-}
-
-// CHECK-LABEL: func @subview(
-// CHECK-COUNT-2: !llvm.ptr<f32>,
-// CHECK-COUNT-5: {{%[a-zA-Z0-9]*}}: i64,
-// CHECK:         %[[ARG0:[a-zA-Z0-9]*]]: i64,
-// CHECK:         %[[ARG1:[a-zA-Z0-9]*]]: i64,
-// CHECK:         %[[ARG2:.*]]: i64)
-// CHECK32-LABEL: func @subview(
-// CHECK32-COUNT-2: !llvm.ptr<f32>,
-// CHECK32-COUNT-5: {{%[a-zA-Z0-9]*}}: i32,
-// CHECK32:         %[[ARG0:[a-zA-Z0-9]*]]: i32,
-// CHECK32:         %[[ARG1:[a-zA-Z0-9]*]]: i32,
-// CHECK32:         %[[ARG2:.*]]: i32)
-func @subview(%0 : memref<64x4xf32, offset: 0, strides: [4, 1]>, %arg0 : index, %arg1 : index, %arg2 : index) {
-  // The last "insertvalue" that populates the memref descriptor from the function arguments.
-  // CHECK: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1]
-  // CHECK32: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1]
-
-  // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
-  // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
-  // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[OFFINC:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : i64
-  // CHECK: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : i64
-  // CHECK: %[[OFFINC1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : i64
-  // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : i64
-  // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : i64
-  // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : i64
-  // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
-  // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
-  // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[OFFINC:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : i32
-  // CHECK32: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : i32
-  // CHECK32: %[[OFFINC1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : i32
-  // CHECK32: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : i32
-  // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : i32
-  // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : i32
-  // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-
-  %1 = memref.subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] :
-    memref<64x4xf32, offset: 0, strides: [4, 1]>
-  to memref<?x?xf32, offset: ?, strides: [?, ?]>
-  return
-}
-
-// CHECK-LABEL: func @subview_non_zero_addrspace(
-// CHECK-COUNT-2: !llvm.ptr<f32, 3>,
-// CHECK-COUNT-5: {{%[a-zA-Z0-9]*}}: i64,
-// CHECK:         %[[ARG0:[a-zA-Z0-9]*]]: i64,
-// CHECK:         %[[ARG1:[a-zA-Z0-9]*]]: i64,
-// CHECK:         %[[ARG2:.*]]: i64)
-// CHECK32-LABEL: func @subview_non_zero_addrspace(
-// CHECK32-COUNT-2: !llvm.ptr<f32, 3>,
-// CHECK32-COUNT-5: {{%[a-zA-Z0-9]*}}: i32,
-// CHECK32:         %[[ARG0:[a-zA-Z0-9]*]]: i32,
-// CHECK32:         %[[ARG1:[a-zA-Z0-9]*]]: i32,
-// CHECK32:         %[[ARG2:.*]]: i32)
-func @subview_non_zero_addrspace(%0 : memref<64x4xf32, offset: 0, strides: [4, 1], 3>, %arg0 : index, %arg1 : index, %arg2 : index) {
-  // The last "insertvalue" that populates the memref descriptor from the function arguments.
-  // CHECK: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1]
-  // CHECK32: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1]
-
-  // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32, 3> to !llvm.ptr<f32, 3>
-  // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32, 3> to !llvm.ptr<f32, 3>
-  // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[OFFINC:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : i64
-  // CHECK: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : i64
-  // CHECK: %[[OFFINC1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : i64
-  // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : i64
-  // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : i64
-  // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : i64
-  // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32, 3> to !llvm.ptr<f32, 3>
-  // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32, 3> to !llvm.ptr<f32, 3>
-  // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[OFFINC:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : i32
-  // CHECK32: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : i32
-  // CHECK32: %[[OFFINC1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : i32
-  // CHECK32: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : i32
-  // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : i32
-  // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : i32
-  // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i32, array<2 x i32>, array<2 x i32>)>
-
-  %1 = memref.subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] :
-    memref<64x4xf32, offset: 0, strides: [4, 1], 3>
-    to memref<?x?xf32, offset: ?, strides: [?, ?], 3>
-  return
-}
-
-// CHECK-LABEL: func @subview_const_size(
-// CHECK-SAME:         %[[ARG0:[a-zA-Z0-9]*]]: !llvm.ptr<f32>,
-// CHECK-SAME:         %[[ARG1:[a-zA-Z0-9]*]]: !llvm.ptr<f32>,
-// CHECK-SAME:         %[[ARG2:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG3:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG4:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG5:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG6:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG7:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG8:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG9:[a-zA-Z0-9]*]]: i64
-// CHECK32-LABEL: func @subview_const_size(
-// CHECK32-SAME:         %[[ARG0:[a-zA-Z0-9]*]]: !llvm.ptr<f32>,
-// CHECK32-SAME:         %[[ARG1:[a-zA-Z0-9]*]]: !llvm.ptr<f32>,
-// CHECK32-SAME:         %[[ARG2:[a-zA-Z0-9]*]]: i32
-// CHECK32-SAME:         %[[ARG3:[a-zA-Z0-9]*]]: i32
-// CHECK32-SAME:         %[[ARG4:[a-zA-Z0-9]*]]: i32
-// CHECK32-SAME:         %[[ARG5:[a-zA-Z0-9]*]]: i32
-// CHECK32-SAME:         %[[ARG6:[a-zA-Z0-9]*]]: i32
-// CHECK32-SAME:         %[[ARG7:[a-zA-Z0-9]*]]: i32
-// CHECK32-SAME:         %[[ARG8:[a-zA-Z0-9]*]]: i32
-// CHECK32-SAME:         %[[ARG9:[a-zA-Z0-9]*]]: i32
-func @subview_const_size(%0 : memref<64x4xf32, offset: 0, strides: [4, 1]>, %arg0 : index, %arg1 : index, %arg2 : index) {
-  // The last "insertvalue" that populates the memref descriptor from the function arguments.
-  // CHECK: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1]
-  // CHECK32: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1]
-
-  // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
-  // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
-  // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[OFFINC:.*]] = llvm.mul %[[ARG7]], %[[STRIDE0]] : i64
-  // CHECK: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : i64
-  // CHECK: %[[OFFINC1:.*]] = llvm.mul %[[ARG8]], %[[STRIDE1]] : i64
-  // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : i64
-  // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[CST2:.*]] = llvm.mlir.constant(2 : i64)
-  // CHECK: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG8]], %[[STRIDE1]] : i64
-  // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[CST2]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[CST4:.*]] = llvm.mlir.constant(4 : i64)
-  // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG7]], %[[STRIDE0]] : i64
-  // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[CST4]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
-  // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
-  // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[OFFINC:.*]] = llvm.mul %[[ARG7]], %[[STRIDE0]] : i32
-  // CHECK32: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : i32
-  // CHECK32: %[[OFFINC1:.*]] = llvm.mul %[[ARG8]], %[[STRIDE1]] : i32
-  // CHECK32: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : i32
-  // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[CST2:.*]] = llvm.mlir.constant(2 : i64)
-  // CHECK32: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG8]], %[[STRIDE1]] : i32
-  // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[CST2]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[CST4:.*]] = llvm.mlir.constant(4 : i64)
-  // CHECK32: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG7]], %[[STRIDE0]] : i32
-  // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[CST4]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  %1 = memref.subview %0[%arg0, %arg1][4, 2][%arg0, %arg1] :
-    memref<64x4xf32, offset: 0, strides: [4, 1]>
-    to memref<4x2xf32, offset: ?, strides: [?, ?]>
-  return
-}
-
-// CHECK-LABEL: func @subview_const_stride(
-// CHECK-SAME:         %[[ARG0:[a-zA-Z0-9]*]]: !llvm.ptr<f32>,
-// CHECK-SAME:         %[[ARG1:[a-zA-Z0-9]*]]: !llvm.ptr<f32>,
-// CHECK-SAME:         %[[ARG2:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG3:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG4:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG5:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG6:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG7:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG8:[a-zA-Z0-9]*]]: i64
-// CHECK-SAME:         %[[ARG9:[a-zA-Z0-9]*]]: i64
-// CHECK32-LABEL: func @subview_const_stride(
-// CHECK32-SAME:         %[[ARG0:[a-zA-Z0-9]*]]: !llvm.ptr<f32>,
-// CHECK32-SAME:         %[[ARG1:[a-zA-Z0-9]*]]: !llvm.ptr<f32>,
-// CHECK32-SAME:         %[[ARG2:[a-zA-Z0-9]*]]: i32
-// CHECK32-SAME:         %[[ARG3:[a-zA-Z0-9]*]]: i32
-// CHECK32-SAME:         %[[ARG4:[a-zA-Z0-9]*]]: i32
-// CHECK32-SAME:         %[[ARG5:[a-zA-Z0-9]*]]: i32
-// CHECK32-SAME:         %[[ARG6:[a-zA-Z0-9]*]]: i32
-// CHECK32-SAME:         %[[ARG7:[a-zA-Z0-9]*]]: i32
-// CHECK32-SAME:         %[[ARG8:[a-zA-Z0-9]*]]: i32
-// CHECK32-SAME:         %[[ARG9:[a-zA-Z0-9]*]]: i32
-func @subview_const_stride(%0 : memref<64x4xf32, offset: 0, strides: [4, 1]>, %arg0 : index, %arg1 : index, %arg2 : index) {
-  // The last "insertvalue" that populates the memref descriptor from the function arguments.
-  // CHECK: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1]
-  // CHECK32: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1]
-
-  // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
-  // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
-  // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[OFFINC:.*]] = llvm.mul %[[ARG7]], %[[STRIDE0]] : i64
-  // CHECK: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : i64
-  // CHECK: %[[OFFINC1:.*]] = llvm.mul %[[ARG8]], %[[STRIDE1]] : i64
-  // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : i64
-  // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[CST2:.*]] = llvm.mlir.constant(2 : i64)
-  // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[ARG8]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[CST2]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[CST4:.*]] = llvm.mlir.constant(4 : i64)
-  // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[ARG7]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.insertvalue %[[CST4]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
-  // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
-  // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[OFFINC:.*]] = llvm.mul %[[ARG7]], %[[STRIDE0]] : i32
-  // CHECK32: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : i32
-  // CHECK32: %[[OFFINC1:.*]] = llvm.mul %[[ARG8]], %[[STRIDE1]] : i32
-  // CHECK32: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : i32
-  // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[CST2:.*]] = llvm.mlir.constant(2 : i64)
-  // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[ARG8]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[CST2]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[CST4:.*]] = llvm.mlir.constant(4 : i64)
-  // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[ARG7]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: llvm.insertvalue %[[CST4]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  %1 = memref.subview %0[%arg0, %arg1][%arg0, %arg1][1, 2] :
-    memref<64x4xf32, offset: 0, strides: [4, 1]>
-    to memref<?x?xf32, offset: ?, strides: [4, 2]>
-  return
-}
-
-// CHECK-LABEL: func @subview_const_stride_and_offset(
-// CHECK32-LABEL: func @subview_const_stride_and_offset(
-func @subview_const_stride_and_offset(%0 : memref<64x4xf32, offset: 0, strides: [4, 1]>) {
-  // The last "insertvalue" that populates the memref descriptor from the function arguments.
-  // CHECK: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1]
-  // CHECK32: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1]
-
-  // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
-  // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
-  // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[CST8:.*]] = llvm.mlir.constant(8 : index)
-  // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[CST8]], %[[DESC1]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[CST3:.*]] = llvm.mlir.constant(3 : i64)
-  // CHECK32: %[[CST1:.*]] = llvm.mlir.constant(1 : i64)
-  // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[CST3]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[CST1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[CST62:.*]] = llvm.mlir.constant(62 : i64)
-  // CHECK32: %[[CST4:.*]] = llvm.mlir.constant(4 : i64)
-  // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[CST62]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: llvm.insertvalue %[[CST4]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  %1 = memref.subview %0[0, 8][62, 3][1, 1] :
-    memref<64x4xf32, offset: 0, strides: [4, 1]>
-    to memref<62x3xf32, offset: 8, strides: [4, 1]>
-  return
-}
-
-// CHECK-LABEL: func @subview_mixed_static_dynamic(
-// CHECK-COUNT-2: !llvm.ptr<f32>,
-// CHECK-COUNT-5: {{%[a-zA-Z0-9]*}}: i64,
-// CHECK:         %[[ARG0:[a-zA-Z0-9]*]]: i64,
-// CHECK:         %[[ARG1:[a-zA-Z0-9]*]]: i64,
-// CHECK:         %[[ARG2:.*]]: i64)
-// CHECK32-LABEL: func @subview_mixed_static_dynamic(
-// CHECK32-COUNT-2: !llvm.ptr<f32>,
-// CHECK32-COUNT-5: {{%[a-zA-Z0-9]*}}: i32,
-// CHECK32:         %[[ARG0:[a-zA-Z0-9]*]]: i32,
-// CHECK32:         %[[ARG1:[a-zA-Z0-9]*]]: i32,
-// CHECK32:         %[[ARG2:.*]]: i32)
-func @subview_mixed_static_dynamic(%0 : memref<64x4xf32, offset: 0, strides: [4, 1]>, %arg0 : index, %arg1 : index, %arg2 : index) {
-  // The last "insertvalue" that populates the memref descriptor from the function arguments.
-  // CHECK: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1]
-  // CHECK32: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1]
-
-  // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
-  // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<f32>
-  // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[OFFM1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE0]] : i32
-  // CHECK32: %[[OFFA1:.*]] = llvm.add %[[OFF]], %[[OFFM1]] : i32
-  // CHECK32: %[[CST8:.*]] = llvm.mlir.constant(8 : i64) : i32
-  // CHECK32: %[[OFFM2:.*]] = llvm.mul %[[CST8]], %[[STRIDE1]] : i32
-  // CHECK32: %[[OFFA2:.*]] = llvm.add %[[OFFA1]], %[[OFFM2]] : i32
-  // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFFA2]], %[[DESC1]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-
-  // CHECK32: %[[CST1:.*]] = llvm.mlir.constant(1 : i64) : i32
-  // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[ARG2]], %[[DESC2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[CST1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: %[[CST62:.*]] = llvm.mlir.constant(62 : i64) : i32
-  // CHECK32: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : i32
-  // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[CST62]], %[[DESC4]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  // CHECK32: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<2 x i32>, array<2 x i32>)>
-  %1 = memref.subview %0[%arg1, 8][62, %arg2][%arg0, 1] :
-    memref<64x4xf32, offset: 0, strides: [4, 1]>
-    to memref<62x?xf32, offset: ?, strides: [?, 1]>
-  return
-}
-
-// CHECK-LABEL: func @subview_leading_operands(
-func @subview_leading_operands(%0 : memref<5x3xf32>, %1: memref<5x?xf32>) {
-  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // Alloc ptr
-  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // Aligned ptr
-  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // Offset
-  // CHECK: %[[C6:.*]] = llvm.mlir.constant(6 : index) : i64
-  // CHECK: llvm.insertvalue %[[C6:.*]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // Sizes and strides @rank 1: both static.
-  // CHECK: %[[C3:.*]] = llvm.mlir.constant(3 : index) : i64
-  // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64
-  // CHECK: llvm.insertvalue %[[C3]], %{{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.insertvalue %[[C1]], %{{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // Sizes and strides @rank 0: both static extracted from type.
-  // CHECK: %[[C3_2:.*]] = llvm.mlir.constant(3 : i64) : i64
-  // CHECK: %[[C3_3:.*]] = llvm.mlir.constant(3 : i64) : i64
-  // CHECK: llvm.insertvalue %[[C3_2]], %{{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.insertvalue %[[C3_3]], %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  %2 = memref.subview %0[2][3][1]: memref<5x3xf32> to memref<3x3xf32, offset: 6, strides: [3, 1]>
-
-  return
-}
-
-// CHECK-LABEL: func @subview_leading_operands_dynamic(
-func @subview_leading_operands_dynamic(%0 : memref<5x?xf32>) {
-  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // Alloc ptr
-  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // Aligned ptr
-  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // Extract strides
-  // CHECK: %[[ST0:.*]] = llvm.extractvalue %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[ST1:.*]] = llvm.extractvalue %{{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // Compute and insert offset from 2 + dynamic value.
-  // CHECK: %[[OFF:.*]] = llvm.extractvalue %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : i64) : i64
-  // CHECK: %[[MUL:.*]] = llvm.mul %[[C2]], %[[ST0]] : i64
-  // CHECK: %[[NEW_OFF:.*]] = llvm.add %[[OFF]], %[[MUL]]  : i64
-  // CHECK: llvm.insertvalue %[[NEW_OFF]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // Sizes and strides @rank 1: static stride 1, dynamic size unchanged from source memref.
-  // CHECK: %[[SZ1:.*]] = llvm.extractvalue %{{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64
-  // CHECK: llvm.insertvalue %[[SZ1]], %{{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.insertvalue %[[C1]], %{{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // Sizes and strides @rank 0: both static.
-  // CHECK: %[[C3:.*]] = llvm.mlir.constant(3 : i64) : i64
-  // CHECK: %[[C1_2:.*]] = llvm.mlir.constant(1 : i64) : i64
-  // CHECK: %[[MUL:.*]] = llvm.mul %[[C1_2]], %[[ST0]]  : i64
-  // CHECK: llvm.insertvalue %[[C3]], %{{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.insertvalue %[[MUL]], %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  %1 = memref.subview %0[2][3][1]: memref<5x?xf32> to memref<3x?xf32, offset: ?, strides: [?, 1]>
-  return
-}
-
-// CHECK-LABEL: func @subview_rank_reducing_leading_operands(
-func @subview_rank_reducing_leading_operands(%0 : memref<5x3xf32>) {
-  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // Alloc ptr
-  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // Aligned ptr
-  // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // Extract strides
-  // CHECK: %[[ST0:.*]] = llvm.extractvalue %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[ST1:.*]] = llvm.extractvalue %{{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // Offset
-  // CHECK: %[[C3:.*]] = llvm.mlir.constant(3 : index) : i64
-  // CHECK: llvm.insertvalue %[[C3:.*]], %{{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
-  // Sizes and strides @rank 0: both static.
-  // CHECK: %[[C3:.*]] = llvm.mlir.constant(3 : index) : i64
-  // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64
-  // CHECK: llvm.insertvalue %[[C3]], %{{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
-  // CHECK: llvm.insertvalue %[[C1]], %{{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
-  %1 = memref.subview %0[1][1][1]: memref<5x3xf32> to memref<3xf32, offset: 3, strides: [1]>
-
-  return
-}
-
-
-// -----
-
-// CHECK-LABEL: func @atomic_rmw
-func @atomic_rmw(%I : memref<10xi32>, %ival : i32, %F : memref<10xf32>, %fval : f32, %i : index) {
-  atomic_rmw assign %fval, %F[%i] : (f32, memref<10xf32>) -> f32
-  // CHECK: llvm.atomicrmw xchg %{{.*}}, %{{.*}} acq_rel
-  atomic_rmw addi %ival, %I[%i] : (i32, memref<10xi32>) -> i32
-  // CHECK: llvm.atomicrmw add %{{.*}}, %{{.*}} acq_rel
-  atomic_rmw maxs %ival, %I[%i] : (i32, memref<10xi32>) -> i32
-  // CHECK: llvm.atomicrmw max %{{.*}}, %{{.*}} acq_rel
-  atomic_rmw mins %ival, %I[%i] : (i32, memref<10xi32>) -> i32
-  // CHECK: llvm.atomicrmw min %{{.*}}, %{{.*}} acq_rel
-  atomic_rmw maxu %ival, %I[%i] : (i32, memref<10xi32>) -> i32
-  // CHECK: llvm.atomicrmw umax %{{.*}}, %{{.*}} acq_rel
-  atomic_rmw minu %ival, %I[%i] : (i32, memref<10xi32>) -> i32
-  // CHECK: llvm.atomicrmw umin %{{.*}}, %{{.*}} acq_rel
-  atomic_rmw addf %fval, %F[%i] : (f32, memref<10xf32>) -> f32
-  // CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} acq_rel
-  return
-}
-
-// -----
-
-// CHECK-LABEL: func @generic_atomic_rmw
-func @generic_atomic_rmw(%I : memref<10xi32>, %i : index) -> i32 {
-  %x = generic_atomic_rmw %I[%i] : memref<10xi32> {
-    ^bb0(%old_value : i32):
-      %c1 = constant 1 : i32
-      atomic_yield %c1 : i32
-  }
-  // CHECK: [[init:%.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
-  // CHECK-NEXT: llvm.br ^bb1([[init]] : i32)
-  // CHECK-NEXT: ^bb1([[loaded:%.*]]: i32):
-  // CHECK-NEXT: [[c1:%.*]] = llvm.mlir.constant(1 : i32)
-  // CHECK-NEXT: [[pair:%.*]] = llvm.cmpxchg %{{.*}}, [[loaded]], [[c1]]
-  // CHECK-SAME:                    acq_rel monotonic : i32
-  // CHECK-NEXT: [[new:%.*]] = llvm.extractvalue [[pair]][0]
-  // CHECK-NEXT: [[ok:%.*]] = llvm.extractvalue [[pair]][1]
-  // CHECK-NEXT: llvm.cond_br [[ok]], ^bb2, ^bb1([[new]] : i32)
-  // CHECK-NEXT: ^bb2:
-  %c2 = constant 2 : i32
-  %add = addi %c2, %x : i32
-  return %add : i32
-  // CHECK-NEXT: [[c2:%.*]] = llvm.mlir.constant(2 : i32)
-  // CHECK-NEXT: [[add:%.*]] = llvm.add [[c2]], [[new]] : i32
-  // CHECK-NEXT: llvm.return [[add]]
-}
-
-// -----
-
-// CHECK-LABEL: func @assume_alignment
-func @assume_alignment(%0 : memref<4x4xf16>) {
-  // CHECK: %[[PTR:.*]] = llvm.extractvalue %[[MEMREF:.*]][1] : !llvm.struct<(ptr<f16>, ptr<f16>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK-NEXT: %[[ZERO:.*]] = llvm.mlir.constant(0 : index) : i64
-  // CHECK-NEXT: %[[MASK:.*]] = llvm.mlir.constant(15 : index) : i64
-  // CHECK-NEXT: %[[INT:.*]] = llvm.ptrtoint %[[PTR]] : !llvm.ptr<f16> to i64
-  // CHECK-NEXT: %[[MASKED_PTR:.*]] = llvm.and %[[INT]], %[[MASK:.*]] : i64
-  // CHECK-NEXT: %[[CONDITION:.*]] = llvm.icmp "eq" %[[MASKED_PTR]], %[[ZERO]] : i64
-  // CHECK-NEXT: "llvm.intr.assume"(%[[CONDITION]]) : (i1) -> ()
-  memref.assume_alignment %0, 16 : memref<4x4xf16>
-  return
-}
-
-// -----
-
-// CHECK-LABEL: func @memref_index
-// CHECK-SAME: %arg0: !llvm.ptr<i64>, %arg1: !llvm.ptr<i64>,
-// CHECK-SAME: %arg2: i64, %arg3: i64, %arg4: i64)
-// CHECK-SAME: -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
-// CHECK32-LABEL: func @memref_index
-// CHECK32-SAME: %arg0: !llvm.ptr<i32>, %arg1: !llvm.ptr<i32>,
-// CHECK32-SAME: %arg2: i32, %arg3: i32, %arg4: i32)
-// CHECK32-SAME: -> !llvm.struct<(ptr<i32>, ptr<i32>, i32, array<1 x i32>, array<1 x i32>)>
-func @memref_index(%arg0: memref<32xindex>) -> memref<32xindex> {
-  return %arg0 : memref<32xindex>
-}
-
-// -----
-
-// CHECK-LABEL: func @rank_of_unranked
-// CHECK32-LABEL: func @rank_of_unranked
-func @rank_of_unranked(%unranked: memref<*xi32>) {
-  %rank = rank %unranked : memref<*xi32>
-  return
-}
-// CHECK-NEXT: llvm.mlir.undef
-// CHECK-NEXT: llvm.insertvalue
-// CHECK-NEXT: llvm.insertvalue
-// CHECK-NEXT: llvm.extractvalue %{{.*}}[0] : !llvm.struct<(i64, ptr<i8>)>
-// CHECK32: llvm.extractvalue %{{.*}}[0] : !llvm.struct<(i32, ptr<i8>)>
-
-// CHECK-LABEL: func @rank_of_ranked
-// CHECK32-LABEL: func @rank_of_ranked
-func @rank_of_ranked(%ranked: memref<?xi32>) {
-  %rank = rank %ranked : memref<?xi32>
-  return
-}
-// CHECK: llvm.mlir.constant(1 : index) : i64
-// CHECK32: llvm.mlir.constant(1 : index) : i32
-
-// -----
-
-// CHECK-LABEL: func @dim_of_unranked
-// CHECK32-LABEL: func @dim_of_unranked
-func @dim_of_unranked(%unranked: memref<*xi32>) -> index {
-  %c0 = constant 0 : index
-  %dim = memref.dim %unranked, %c0 : memref<*xi32>
-  return %dim : index
-}
-// CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
-// CHECK-NEXT: llvm.insertvalue
-// CHECK-NEXT: %[[UNRANKED_DESC:.*]] = llvm.insertvalue
-// CHECK-NEXT: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64
-
-// CHECK-NEXT: %[[RANKED_DESC:.*]] = llvm.extractvalue %[[UNRANKED_DESC]][1]
-// CHECK-SAME:   : !llvm.struct<(i64, ptr<i8>)>
-
-// CHECK-NEXT: %[[ZERO_D_DESC:.*]] = llvm.bitcast %[[RANKED_DESC]]
-// CHECK-SAME:   : !llvm.ptr<i8> to !llvm.ptr<struct<(ptr<i32>, ptr<i32>, i64)>>
-
-// CHECK-NEXT: %[[C2_i32:.*]] = llvm.mlir.constant(2 : i32) : i32
-// CHECK-NEXT: %[[C0_:.*]] = llvm.mlir.constant(0 : index) : i64
-
-// CHECK-NEXT: %[[OFFSET_PTR:.*]] = llvm.getelementptr %[[ZERO_D_DESC]]{{\[}}
-// CHECK-SAME:   %[[C0_]], %[[C2_i32]]] : (!llvm.ptr<struct<(ptr<i32>, ptr<i32>,
-// CHECK-SAME:   i64)>>, i64, i32) -> !llvm.ptr<i64>
-
-// CHECK-NEXT: %[[C1:.*]] = llvm.mlir.constant(1 : index) : i64
-// CHECK-NEXT: %[[INDEX_INC:.*]] = llvm.add %[[C1]], %[[C0]] : i64
-
-// CHECK-NEXT: %[[SIZE_PTR:.*]] = llvm.getelementptr %[[OFFSET_PTR]]{{\[}}
-// CHECK-SAME:   %[[INDEX_INC]]] : (!llvm.ptr<i64>, i64) -> !llvm.ptr<i64>
-
-// CHECK-NEXT: %[[SIZE:.*]] = llvm.load %[[SIZE_PTR]] : !llvm.ptr<i64>
-// CHECK-NEXT: llvm.return %[[SIZE]] : i64
-
-// CHECK32: %[[SIZE:.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
-// CHECK32-NEXT: llvm.return %[[SIZE]] : i32
-
-// -----
-
-func @switchLower(%arg0: i32, %arg1 : i32, %arg2 : i32) -> i32 {
-    switch %arg0 : i32, [
-      default: ^bb2,
-      115: ^bb1
-    ]
-  ^bb1:
-    llvm.return %arg1 : i32
-  ^bb2:
-    llvm.return %arg2 : i32
-}
-
-// CHECK: llvm.switch %arg0, ^[[bb2:.+]] [
-// CHECK-NEXT:      115: ^[[bb1:.+]]
-// CHECK-NEXT:    ]
-// CHECK:  ^[[bb1]]:
-// CHECK:    llvm.return %arg1 : i32
-// CHECK:  ^[[bb2]]:
-// CHECK:    llvm.return %arg2 : i32

diff  --git a/mlir/test/Conversion/StandardToLLVM/func-memref.mlir b/mlir/test/Conversion/StandardToLLVM/func-memref.mlir
new file mode 100644
index 0000000000000..b96c260d5a0f5
--- /dev/null
+++ b/mlir/test/Conversion/StandardToLLVM/func-memref.mlir
@@ -0,0 +1,184 @@
+// RUN: mlir-opt -convert-std-to-llvm -split-input-file %s | FileCheck %s
+// RUN: mlir-opt -convert-std-to-llvm='use-bare-ptr-memref-call-conv=1' -split-input-file %s | FileCheck %s --check-prefix=BAREPTR
+
+// BAREPTR-LABEL: func @check_noalias
+// BAREPTR-SAME: %{{.*}}: !llvm.ptr<f32> {llvm.noalias}, %{{.*}}: !llvm.ptr<f32> {llvm.noalias}
+func @check_noalias(%static : memref<2xf32> {llvm.noalias}, %other : memref<2xf32> {llvm.noalias}) {
+    return
+}
+
+// -----
+
+// CHECK-LABEL: func @check_strided_memref_arguments(
+// CHECK-COUNT-2: !llvm.ptr<f32>
+// CHECK-COUNT-5: i64
+// CHECK-COUNT-2: !llvm.ptr<f32>
+// CHECK-COUNT-5: i64
+// CHECK-COUNT-2: !llvm.ptr<f32>
+// CHECK-COUNT-5: i64
+func @check_strided_memref_arguments(%static: memref<10x20xf32, affine_map<(i,j)->(20 * i + j + 1)>>,
+                                     %dynamic : memref<?x?xf32, affine_map<(i,j)[M]->(M * i + j + 1)>>,
+                                     %mixed : memref<10x?xf32, affine_map<(i,j)[M]->(M * i + j + 1)>>) {
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @memref_index
+// CHECK-SAME: %arg0: !llvm.ptr<i64>, %arg1: !llvm.ptr<i64>,
+// CHECK-SAME: %arg2: i64, %arg3: i64, %arg4: i64)
+// CHECK-SAME: -> !llvm.struct<(ptr<i64>, ptr<i64>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK32-LABEL: func @memref_index
+// CHECK32-SAME: %arg0: !llvm.ptr<i32>, %arg1: !llvm.ptr<i32>,
+// CHECK32-SAME: %arg2: i32, %arg3: i32, %arg4: i32)
+// CHECK32-SAME: -> !llvm.struct<(ptr<i32>, ptr<i32>, i32, array<1 x i32>, array<1 x i32>)>
+func @memref_index(%arg0: memref<32xindex>) -> memref<32xindex> {
+  return %arg0 : memref<32xindex>
+}
+
+// -----
+
+// CHECK-LABEL: func @check_arguments
+// CHECK-COUNT-2: !llvm.ptr<f32>
+// CHECK-COUNT-5: i64
+// CHECK-COUNT-2: !llvm.ptr<f32>
+// CHECK-COUNT-5: i64
+// CHECK-COUNT-2: !llvm.ptr<f32>
+// CHECK-COUNT-5: i64
+func @check_arguments(%static: memref<10x20xf32>, %dynamic : memref<?x?xf32>, %mixed : memref<10x?xf32>) {
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @check_static_return
+// CHECK-COUNT-2: !llvm.ptr<f32>
+// CHECK-COUNT-5: i64
+// CHECK-SAME: -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-LABEL: func @check_static_return
+// BAREPTR-SAME: (%[[arg:.*]]: !llvm.ptr<f32>) -> !llvm.ptr<f32> {
+func @check_static_return(%static : memref<32x18xf32>) -> memref<32x18xf32> {
+// CHECK:  llvm.return %{{.*}} : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+
+// BAREPTR: %[[udf:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-NEXT: %[[base0:.*]] = llvm.insertvalue %[[arg]], %[[udf]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-NEXT: %[[aligned:.*]] = llvm.insertvalue %[[arg]], %[[base0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-NEXT: %[[val0:.*]] = llvm.mlir.constant(0 : index) : i64
+// BAREPTR-NEXT: %[[ins0:.*]] = llvm.insertvalue %[[val0]], %[[aligned]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-NEXT: %[[val1:.*]] = llvm.mlir.constant(32 : index) : i64
+// BAREPTR-NEXT: %[[ins1:.*]] = llvm.insertvalue %[[val1]], %[[ins0]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-NEXT: %[[val2:.*]] = llvm.mlir.constant(18 : index) : i64
+// BAREPTR-NEXT: %[[ins2:.*]] = llvm.insertvalue %[[val2]], %[[ins1]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-NEXT: %[[val3:.*]] = llvm.mlir.constant(18 : index) : i64
+// BAREPTR-NEXT: %[[ins3:.*]] = llvm.insertvalue %[[val3]], %[[ins2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-NEXT: %[[val4:.*]] = llvm.mlir.constant(1 : index) : i64
+// BAREPTR-NEXT: %[[ins4:.*]] = llvm.insertvalue %[[val4]], %[[ins3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-NEXT: %[[base1:.*]] = llvm.extractvalue %[[ins4]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-NEXT: llvm.return %[[base1]] : !llvm.ptr<f32>
+  return %static : memref<32x18xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @check_static_return_with_offset
+// CHECK-COUNT-2: !llvm.ptr<f32>
+// CHECK-COUNT-5: i64
+// CHECK-SAME: -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-LABEL: func @check_static_return_with_offset
+// BAREPTR-SAME: (%[[arg:.*]]: !llvm.ptr<f32>) -> !llvm.ptr<f32> {
+func @check_static_return_with_offset(%static : memref<32x18xf32, offset:7, strides:[22,1]>) -> memref<32x18xf32, offset:7, strides:[22,1]> {
+// CHECK:  llvm.return %{{.*}} : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+
+// BAREPTR: %[[udf:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-NEXT: %[[base0:.*]] = llvm.insertvalue %[[arg]], %[[udf]][0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-NEXT: %[[aligned:.*]] = llvm.insertvalue %[[arg]], %[[base0]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-NEXT: %[[val0:.*]] = llvm.mlir.constant(7 : index) : i64
+// BAREPTR-NEXT: %[[ins0:.*]] = llvm.insertvalue %[[val0]], %[[aligned]][2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-NEXT: %[[val1:.*]] = llvm.mlir.constant(32 : index) : i64
+// BAREPTR-NEXT: %[[ins1:.*]] = llvm.insertvalue %[[val1]], %[[ins0]][3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-NEXT: %[[val2:.*]] = llvm.mlir.constant(22 : index) : i64
+// BAREPTR-NEXT: %[[ins2:.*]] = llvm.insertvalue %[[val2]], %[[ins1]][4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-NEXT: %[[val3:.*]] = llvm.mlir.constant(18 : index) : i64
+// BAREPTR-NEXT: %[[ins3:.*]] = llvm.insertvalue %[[val3]], %[[ins2]][3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-NEXT: %[[val4:.*]] = llvm.mlir.constant(1 : index) : i64
+// BAREPTR-NEXT: %[[ins4:.*]] = llvm.insertvalue %[[val4]], %[[ins3]][4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-NEXT: %[[base1:.*]] = llvm.extractvalue %[[ins4]][1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
+// BAREPTR-NEXT: llvm.return %[[base1]] : !llvm.ptr<f32>
+  return %static : memref<32x18xf32, offset:7, strides:[22,1]>
+}
+
+// -----
+
+// BAREPTR: llvm.func @foo(!llvm.ptr<i8>) -> !llvm.ptr<i8>
+func private @foo(memref<10xi8>) -> memref<20xi8>
+
+// BAREPTR-LABEL: func @check_memref_func_call
+// BAREPTR-SAME:    %[[in:.*]]: !llvm.ptr<i8>) -> !llvm.ptr<i8>
+func @check_memref_func_call(%in : memref<10xi8>) -> memref<20xi8> {
+  // BAREPTR:         %[[inDesc:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 0]
+  // BAREPTR-NEXT:    %[[barePtr:.*]] = llvm.extractvalue %[[inDesc]][1] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
+  // BAREPTR-NEXT:    %[[call:.*]] = llvm.call @foo(%[[barePtr]]) : (!llvm.ptr<i8>) -> !llvm.ptr<i8>
+  // BAREPTR-NEXT:    %[[desc0:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
+  // BAREPTR-NEXT:    %[[desc1:.*]] = llvm.insertvalue %[[call]], %[[desc0]][0] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
+  // BAREPTR-NEXT:    %[[desc2:.*]] = llvm.insertvalue %[[call]], %[[desc1]][1] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
+  // BAREPTR-NEXT:    %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64
+  // BAREPTR-NEXT:    %[[desc4:.*]] = llvm.insertvalue %[[c0]], %[[desc2]][2] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
+  // BAREPTR-NEXT:    %[[c20:.*]] = llvm.mlir.constant(20 : index) : i64
+  // BAREPTR-NEXT:    %[[desc6:.*]] = llvm.insertvalue %[[c20]], %[[desc4]][3, 0] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
+  // BAREPTR-NEXT:    %[[c1:.*]] = llvm.mlir.constant(1 : index) : i64
+  // BAREPTR-NEXT:    %[[outDesc:.*]] = llvm.insertvalue %[[c1]], %[[desc6]][4, 0] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
+  %res = call @foo(%in) : (memref<10xi8>) -> (memref<20xi8>)
+  // BAREPTR-NEXT:    %[[res:.*]] = llvm.extractvalue %[[outDesc]][1] : !llvm.struct<(ptr<i8>, ptr<i8>, i64, array<1 x i64>, array<1 x i64>)>
+  // BAREPTR-NEXT:    llvm.return %[[res]] : !llvm.ptr<i8>
+  return %res : memref<20xi8>
+}
+
+// -----
+
+// Unranked memrefs are currently not supported in the bare-ptr calling
+// convention. Check that the conversion to the LLVM-IR dialect doesn't happen
+// in the presence of unranked memrefs when using such a calling convention.
+
+// BAREPTR: func private @hoo(memref<*xi8>) -> memref<*xi8>
+func private @hoo(memref<*xi8>) -> memref<*xi8>
+
+// BAREPTR-LABEL: func @check_unranked_memref_func_call(%{{.*}}: memref<*xi8>) -> memref<*xi8>
+func @check_unranked_memref_func_call(%in: memref<*xi8>) -> memref<*xi8> {
+  // BAREPTR-NEXT: call @hoo(%{{.*}}) : (memref<*xi8>) -> memref<*xi8>
+  %res = call @hoo(%in) : (memref<*xi8>) -> memref<*xi8>
+  // BAREPTR-NEXT: return %{{.*}} : memref<*xi8>
+  return %res : memref<*xi8>
+}
+
+// -----
+
+// Should not convert memrefs with unsupported types in any convention.
+
+// CHECK: @unsupported_memref_element_type
+// CHECK-SAME: memref<
+// CHECK-NOT: !llvm.struct
+// BAREPTR: @unsupported_memref_element_type
+// BAREPTR-SAME: memref<
+// BAREPTR-NOT: !llvm.ptr
+func private @unsupported_memref_element_type() -> memref<42 x !test.memref_element>
+
+// CHECK: @unsupported_unranked_memref_element_type
+// CHECK-SAME: memref<
+// CHECK-NOT: !llvm.struct
+// BAREPTR: @unsupported_unranked_memref_element_type
+// BAREPTR-SAME: memref<
+// BAREPTR-NOT: !llvm.ptr
+func private @unsupported_unranked_memref_element_type() -> memref<* x !test.memref_element>
+
+// -----
+
+// BAREPTR: llvm.func @goo(f32) -> f32
+func private @goo(f32) -> f32
+
+// BAREPTR-LABEL: func @check_scalar_func_call
+// BAREPTR-SAME:    %[[in:.*]]: f32)
+func @check_scalar_func_call(%in : f32) {
+  // BAREPTR-NEXT:    %[[call:.*]] = llvm.call @goo(%[[in]]) : (f32) -> f32
+  %res = call @goo(%in) : (f32) -> (f32)
+  return
+}

diff  --git a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir
index a743e31971a3f..281fe0c2b24c2 100644
--- a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir
+++ b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir
@@ -1,15 +1,865 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -convert-std-to-llvm -split-input-file -verify-diagnostics | FileCheck %s
-
-// CHECK-LABEL: func @address_space(
-// CHECK-SAME:    !llvm.ptr<f32, 7>
-func @address_space(%arg0 : memref<32xf32, affine_map<(d0) -> (d0)>, 7>) {
-  %0 = memref.alloc() : memref<32xf32, affine_map<(d0) -> (d0)>, 5>
-  %1 = constant 7 : index
-  // CHECK: llvm.load %{{.*}} : !llvm.ptr<f32, 5>
-  %2 = memref.load %0[%1] : memref<32xf32, affine_map<(d0) -> (d0)>, 5>
-  std.return
+// RUN: mlir-opt -convert-std-to-llvm %s -split-input-file | FileCheck %s
+// RUN: mlir-opt -convert-std-to-llvm='index-bitwidth=32' %s -split-input-file | FileCheck --check-prefix=CHECK32 %s
+
+// CHECK-LABEL: func @empty() {
+// CHECK-NEXT:  llvm.return
+// CHECK-NEXT: }
+func @empty() {
+^bb0:
+  return
+}
+
+// CHECK-LABEL: llvm.func @body(i64)
+func private @body(index)
+
+// CHECK-LABEL: func @simple_loop() {
+// CHECK32-LABEL: func @simple_loop() {
+func @simple_loop() {
+^bb0:
+// CHECK-NEXT:  llvm.br ^bb1
+// CHECK32-NEXT:  llvm.br ^bb1
+  br ^bb1
+
+// CHECK-NEXT: ^bb1:	// pred: ^bb0
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(1 : index) : i64
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(42 : index) : i64
+// CHECK-NEXT:  llvm.br ^bb2({{.*}} : i64)
+// CHECK32-NEXT: ^bb1:	// pred: ^bb0
+// CHECK32-NEXT:  {{.*}} = llvm.mlir.constant(1 : index) : i32
+// CHECK32-NEXT:  {{.*}} = llvm.mlir.constant(42 : index) : i32
+// CHECK32-NEXT:  llvm.br ^bb2({{.*}} : i32)
+^bb1:	// pred: ^bb0
+  %c1 = constant 1 : index
+  %c42 = constant 42 : index
+  br ^bb2(%c1 : index)
+
+// CHECK:      ^bb2({{.*}}: i64):	// 2 preds: ^bb1, ^bb3
+// CHECK-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i64
+// CHECK-NEXT:  llvm.cond_br {{.*}}, ^bb3, ^bb4
+// CHECK32:      ^bb2({{.*}}: i32):	// 2 preds: ^bb1, ^bb3
+// CHECK32-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i32
+// CHECK32-NEXT:  llvm.cond_br {{.*}}, ^bb3, ^bb4
+^bb2(%0: index):	// 2 preds: ^bb1, ^bb3
+  %1 = cmpi slt, %0, %c42 : index
+  cond_br %1, ^bb3, ^bb4
+
+// CHECK:      ^bb3:	// pred: ^bb2
+// CHECK-NEXT:  llvm.call @body({{.*}}) : (i64) -> ()
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(1 : index) : i64
+// CHECK-NEXT:  {{.*}} = llvm.add {{.*}}, {{.*}} : i64
+// CHECK-NEXT:  llvm.br ^bb2({{.*}} : i64)
+// CHECK32:      ^bb3:	// pred: ^bb2
+// CHECK32-NEXT:  llvm.call @body({{.*}}) : (i32) -> ()
+// CHECK32-NEXT:  {{.*}} = llvm.mlir.constant(1 : index) : i32
+// CHECK32-NEXT:  {{.*}} = llvm.add {{.*}}, {{.*}} : i32
+// CHECK32-NEXT:  llvm.br ^bb2({{.*}} : i32)
+^bb3:	// pred: ^bb2
+  call @body(%0) : (index) -> ()
+  %c1_0 = constant 1 : index
+  %2 = addi %0, %c1_0 : index
+  br ^bb2(%2 : index)
+
+// CHECK:      ^bb4:	// pred: ^bb2
+// CHECK-NEXT:  llvm.return
+^bb4:	// pred: ^bb2
+  return
+}
+
+// CHECK-LABEL: func @simple_caller() {
+// CHECK-NEXT:  llvm.call @simple_loop() : () -> ()
+// CHECK-NEXT:  llvm.return
+// CHECK-NEXT: }
+func @simple_caller() {
+^bb0:
+  call @simple_loop() : () -> ()
+  return
+}
+
+// Check that function call attributes persist during conversion.
+// CHECK-LABEL: @call_with_attributes
+func @call_with_attributes() {
+  // CHECK: llvm.call @simple_loop() {baz = [1, 2, 3, 4], foo = "bar"} : () -> ()
+  call @simple_loop() {foo="bar", baz=[1,2,3,4]} : () -> ()
+  return
+}
+
+// CHECK-LABEL: func @ml_caller() {
+// CHECK-NEXT:  llvm.call @simple_loop() : () -> ()
+// CHECK-NEXT:  llvm.call @more_imperfectly_nested_loops() : () -> ()
+// CHECK-NEXT:  llvm.return
+// CHECK-NEXT: }
+func @ml_caller() {
+^bb0:
+  call @simple_loop() : () -> ()
+  call @more_imperfectly_nested_loops() : () -> ()
+  return
+}
+
+// CHECK-LABEL: llvm.func @body_args(i64) -> i64
+// CHECK32-LABEL: llvm.func @body_args(i32) -> i32
+func private @body_args(index) -> index
+// CHECK-LABEL: llvm.func @other(i64, i32) -> i32
+// CHECK32-LABEL: llvm.func @other(i32, i32) -> i32
+func private @other(index, i32) -> i32
+
+// CHECK-LABEL: func @func_args(%arg0: i32, %arg1: i32) -> i32 {
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(0 : i32) : i32
+// CHECK-NEXT:  llvm.br ^bb1
+// CHECK32-LABEL: func @func_args(%arg0: i32, %arg1: i32) -> i32 {
+// CHECK32-NEXT:  {{.*}} = llvm.mlir.constant(0 : i32) : i32
+// CHECK32-NEXT:  llvm.br ^bb1
+func @func_args(i32, i32) -> i32 {
+^bb0(%arg0: i32, %arg1: i32):
+  %c0_i32 = constant 0 : i32
+  br ^bb1
+
+// CHECK-NEXT: ^bb1:	// pred: ^bb0
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(0 : index) : i64
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(42 : index) : i64
+// CHECK-NEXT:  llvm.br ^bb2({{.*}} : i64)
+// CHECK32-NEXT: ^bb1:	// pred: ^bb0
+// CHECK32-NEXT:  {{.*}} = llvm.mlir.constant(0 : index) : i32
+// CHECK32-NEXT:  {{.*}} = llvm.mlir.constant(42 : index) : i32
+// CHECK32-NEXT:  llvm.br ^bb2({{.*}} : i32)
+^bb1:	// pred: ^bb0
+  %c0 = constant 0 : index
+  %c42 = constant 42 : index
+  br ^bb2(%c0 : index)
+
+// CHECK-NEXT: ^bb2({{.*}}: i64):	// 2 preds: ^bb1, ^bb3
+// CHECK-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i64
+// CHECK-NEXT:  llvm.cond_br {{.*}}, ^bb3, ^bb4
+// CHECK32-NEXT: ^bb2({{.*}}: i32):	// 2 preds: ^bb1, ^bb3
+// CHECK32-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i32
+// CHECK32-NEXT:  llvm.cond_br {{.*}}, ^bb3, ^bb4
+^bb2(%0: index):	// 2 preds: ^bb1, ^bb3
+  %1 = cmpi slt, %0, %c42 : index
+  cond_br %1, ^bb3, ^bb4
+
+// CHECK-NEXT: ^bb3:	// pred: ^bb2
+// CHECK-NEXT:  {{.*}} = llvm.call @body_args({{.*}}) : (i64) -> i64
+// CHECK-NEXT:  {{.*}} = llvm.call @other({{.*}}, %arg0) : (i64, i32) -> i32
+// CHECK-NEXT:  {{.*}} = llvm.call @other({{.*}}, {{.*}}) : (i64, i32) -> i32
+// CHECK-NEXT:  {{.*}} = llvm.call @other({{.*}}, %arg1) : (i64, i32) -> i32
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(1 : index) : i64
+// CHECK-NEXT:  {{.*}} = llvm.add {{.*}}, {{.*}} : i64
+// CHECK-NEXT:  llvm.br ^bb2({{.*}} : i64)
+// CHECK32-NEXT: ^bb3:	// pred: ^bb2
+// CHECK32-NEXT:  {{.*}} = llvm.call @body_args({{.*}}) : (i32) -> i32
+// CHECK32-NEXT:  {{.*}} = llvm.call @other({{.*}}, %arg0) : (i32, i32) -> i32
+// CHECK32-NEXT:  {{.*}} = llvm.call @other({{.*}}, {{.*}}) : (i32, i32) -> i32
+// CHECK32-NEXT:  {{.*}} = llvm.call @other({{.*}}, %arg1) : (i32, i32) -> i32
+// CHECK32-NEXT:  {{.*}} = llvm.mlir.constant(1 : index) : i32
+// CHECK32-NEXT:  {{.*}} = llvm.add {{.*}}, {{.*}} : i32
+// CHECK32-NEXT:  llvm.br ^bb2({{.*}} : i32)
+^bb3:	// pred: ^bb2
+  %2 = call @body_args(%0) : (index) -> index
+  %3 = call @other(%2, %arg0) : (index, i32) -> i32
+  %4 = call @other(%2, %3) : (index, i32) -> i32
+  %5 = call @other(%2, %arg1) : (index, i32) -> i32
+  %c1 = constant 1 : index
+  %6 = addi %0, %c1 : index
+  br ^bb2(%6 : index)
+
+// CHECK-NEXT: ^bb4:	// pred: ^bb2
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(0 : index) : i64
+// CHECK-NEXT:  {{.*}} = llvm.call @other({{.*}}, {{.*}}) : (i64, i32) -> i32
+// CHECK-NEXT:  llvm.return {{.*}} : i32
+// CHECK32-NEXT: ^bb4:	// pred: ^bb2
+// CHECK32-NEXT:  {{.*}} = llvm.mlir.constant(0 : index) : i32
+// CHECK32-NEXT:  {{.*}} = llvm.call @other({{.*}}, {{.*}}) : (i32, i32) -> i32
+// CHECK32-NEXT:  llvm.return {{.*}} : i32
+^bb4:	// pred: ^bb2
+  %c0_0 = constant 0 : index
+  %7 = call @other(%c0_0, %c0_i32) : (index, i32) -> i32
+  return %7 : i32
+}
+
+// CHECK-LABEL: llvm.func @pre(i64)
+// CHECK32-LABEL: llvm.func @pre(i32)
+func private @pre(index)
+
+// CHECK-LABEL: llvm.func @body2(i64, i64)
+// CHECK32-LABEL: llvm.func @body2(i32, i32)
+func private @body2(index, index)
+
+// CHECK-LABEL: llvm.func @post(i64)
+// CHECK32-LABEL: llvm.func @post(i32)
+func private @post(index)
+
+// CHECK-LABEL: func @imperfectly_nested_loops() {
+// CHECK-NEXT:  llvm.br ^bb1
+func @imperfectly_nested_loops() {
+^bb0:
+  br ^bb1
+
+// CHECK-NEXT: ^bb1:	// pred: ^bb0
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(0 : index) : i64
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(42 : index) : i64
+// CHECK-NEXT:  llvm.br ^bb2({{.*}} : i64)
+^bb1:	// pred: ^bb0
+  %c0 = constant 0 : index
+  %c42 = constant 42 : index
+  br ^bb2(%c0 : index)
+
+// CHECK-NEXT: ^bb2({{.*}}: i64):	// 2 preds: ^bb1, ^bb7
+// CHECK-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i64
+// CHECK-NEXT:  llvm.cond_br {{.*}}, ^bb3, ^bb8
+^bb2(%0: index):	// 2 preds: ^bb1, ^bb7
+  %1 = cmpi slt, %0, %c42 : index
+  cond_br %1, ^bb3, ^bb8
+
+// CHECK-NEXT: ^bb3:
+// CHECK-NEXT:  llvm.call @pre({{.*}}) : (i64) -> ()
+// CHECK-NEXT:  llvm.br ^bb4
+^bb3:	// pred: ^bb2
+  call @pre(%0) : (index) -> ()
+  br ^bb4
+
+// CHECK-NEXT: ^bb4:	// pred: ^bb3
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(7 : index) : i64
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(56 : index) : i64
+// CHECK-NEXT:  llvm.br ^bb5({{.*}} : i64)
+^bb4:	// pred: ^bb3
+  %c7 = constant 7 : index
+  %c56 = constant 56 : index
+  br ^bb5(%c7 : index)
+
+// CHECK-NEXT: ^bb5({{.*}}: i64):	// 2 preds: ^bb4, ^bb6
+// CHECK-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i64
+// CHECK-NEXT:  llvm.cond_br {{.*}}, ^bb6, ^bb7
+^bb5(%2: index):	// 2 preds: ^bb4, ^bb6
+  %3 = cmpi slt, %2, %c56 : index
+  cond_br %3, ^bb6, ^bb7
+
+// CHECK-NEXT: ^bb6:	// pred: ^bb5
+// CHECK-NEXT:  llvm.call @body2({{.*}}, {{.*}}) : (i64, i64) -> ()
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(2 : index) : i64
+// CHECK-NEXT:  {{.*}} = llvm.add {{.*}}, {{.*}} : i64
+// CHECK-NEXT:  llvm.br ^bb5({{.*}} : i64)
+^bb6:	// pred: ^bb5
+  call @body2(%0, %2) : (index, index) -> ()
+  %c2 = constant 2 : index
+  %4 = addi %2, %c2 : index
+  br ^bb5(%4 : index)
+
+// CHECK-NEXT: ^bb7:	// pred: ^bb5
+// CHECK-NEXT:  llvm.call @post({{.*}}) : (i64) -> ()
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(1 : index) : i64
+// CHECK-NEXT:  {{.*}} = llvm.add {{.*}}, {{.*}} : i64
+// CHECK-NEXT:  llvm.br ^bb2({{.*}} : i64)
+^bb7:	// pred: ^bb5
+  call @post(%0) : (index) -> ()
+  %c1 = constant 1 : index
+  %5 = addi %0, %c1 : index
+  br ^bb2(%5 : index)
+
+// CHECK-NEXT: ^bb8:	// pred: ^bb2
+// CHECK-NEXT:  llvm.return
+^bb8:	// pred: ^bb2
+  return
+}
+
+// CHECK-LABEL: llvm.func @mid(i64)
+func private @mid(index)
+
+// CHECK-LABEL: llvm.func @body3(i64, i64)
+func private @body3(index, index)
+
+// A complete function transformation check.
+// CHECK-LABEL: func @more_imperfectly_nested_loops() {
+// CHECK-NEXT:  llvm.br ^bb1
+// CHECK-NEXT:^bb1:	// pred: ^bb0
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(0 : index) : i64
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(42 : index) : i64
+// CHECK-NEXT:  llvm.br ^bb2({{.*}} : i64)
+// CHECK-NEXT:^bb2({{.*}}: i64):	// 2 preds: ^bb1, ^bb11
+// CHECK-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i64
+// CHECK-NEXT:  llvm.cond_br {{.*}}, ^bb3, ^bb12
+// CHECK-NEXT:^bb3:	// pred: ^bb2
+// CHECK-NEXT:  llvm.call @pre({{.*}}) : (i64) -> ()
+// CHECK-NEXT:  llvm.br ^bb4
+// CHECK-NEXT:^bb4:	// pred: ^bb3
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(7 : index) : i64
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(56 : index) : i64
+// CHECK-NEXT:  llvm.br ^bb5({{.*}} : i64)
+// CHECK-NEXT:^bb5({{.*}}: i64):	// 2 preds: ^bb4, ^bb6
+// CHECK-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i64
+// CHECK-NEXT:  llvm.cond_br {{.*}}, ^bb6, ^bb7
+// CHECK-NEXT:^bb6:	// pred: ^bb5
+// CHECK-NEXT:  llvm.call @body2({{.*}}, {{.*}}) : (i64, i64) -> ()
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(2 : index) : i64
+// CHECK-NEXT:  {{.*}} = llvm.add {{.*}}, {{.*}} : i64
+// CHECK-NEXT:  llvm.br ^bb5({{.*}} : i64)
+// CHECK-NEXT:^bb7:	// pred: ^bb5
+// CHECK-NEXT:  llvm.call @mid({{.*}}) : (i64) -> ()
+// CHECK-NEXT:  llvm.br ^bb8
+// CHECK-NEXT:^bb8:	// pred: ^bb7
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(18 : index) : i64
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(37 : index) : i64
+// CHECK-NEXT:  llvm.br ^bb9({{.*}} : i64)
+// CHECK-NEXT:^bb9({{.*}}: i64):	// 2 preds: ^bb8, ^bb10
+// CHECK-NEXT:  {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : i64
+// CHECK-NEXT:  llvm.cond_br {{.*}}, ^bb10, ^bb11
+// CHECK-NEXT:^bb10:	// pred: ^bb9
+// CHECK-NEXT:  llvm.call @body3({{.*}}, {{.*}}) : (i64, i64) -> ()
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(3 : index) : i64
+// CHECK-NEXT:  {{.*}} = llvm.add {{.*}}, {{.*}} : i64
+// CHECK-NEXT:  llvm.br ^bb9({{.*}} : i64)
+// CHECK-NEXT:^bb11:	// pred: ^bb9
+// CHECK-NEXT:  llvm.call @post({{.*}}) : (i64) -> ()
+// CHECK-NEXT:  {{.*}} = llvm.mlir.constant(1 : index) : i64
+// CHECK-NEXT:  {{.*}} = llvm.add {{.*}}, {{.*}} : i64
+// CHECK-NEXT:  llvm.br ^bb2({{.*}} : i64)
+// CHECK-NEXT:^bb12:	// pred: ^bb2
+// CHECK-NEXT:  llvm.return
+// CHECK-NEXT: }
+func @more_imperfectly_nested_loops() {
+^bb0:
+  br ^bb1
+^bb1:	// pred: ^bb0
+  %c0 = constant 0 : index
+  %c42 = constant 42 : index
+  br ^bb2(%c0 : index)
+^bb2(%0: index):	// 2 preds: ^bb1, ^bb11
+  %1 = cmpi slt, %0, %c42 : index
+  cond_br %1, ^bb3, ^bb12
+^bb3:	// pred: ^bb2
+  call @pre(%0) : (index) -> ()
+  br ^bb4
+^bb4:	// pred: ^bb3
+  %c7 = constant 7 : index
+  %c56 = constant 56 : index
+  br ^bb5(%c7 : index)
+^bb5(%2: index):	// 2 preds: ^bb4, ^bb6
+  %3 = cmpi slt, %2, %c56 : index
+  cond_br %3, ^bb6, ^bb7
+^bb6:	// pred: ^bb5
+  call @body2(%0, %2) : (index, index) -> ()
+  %c2 = constant 2 : index
+  %4 = addi %2, %c2 : index
+  br ^bb5(%4 : index)
+^bb7:	// pred: ^bb5
+  call @mid(%0) : (index) -> ()
+  br ^bb8
+^bb8:	// pred: ^bb7
+  %c18 = constant 18 : index
+  %c37 = constant 37 : index
+  br ^bb9(%c18 : index)
+^bb9(%5: index):	// 2 preds: ^bb8, ^bb10
+  %6 = cmpi slt, %5, %c37 : index
+  cond_br %6, ^bb10, ^bb11
+^bb10:	// pred: ^bb9
+  call @body3(%0, %5) : (index, index) -> ()
+  %c3 = constant 3 : index
+  %7 = addi %5, %c3 : index
+  br ^bb9(%7 : index)
+^bb11:	// pred: ^bb9
+  call @post(%0) : (index) -> ()
+  %c1 = constant 1 : index
+  %8 = addi %0, %c1 : index
+  br ^bb2(%8 : index)
+^bb12:	// pred: ^bb2
+  return
+}
+
+// CHECK-LABEL: llvm.func @get_i64() -> i64
+func private @get_i64() -> (i64)
+// CHECK-LABEL: llvm.func @get_f32() -> f32
+func private @get_f32() -> (f32)
+// CHECK-LABEL: llvm.func @get_c16() -> !llvm.struct<(f16, f16)>
+func private @get_c16() -> (complex<f16>)
+// CHECK-LABEL: llvm.func @get_c32() -> !llvm.struct<(f32, f32)>
+func private @get_c32() -> (complex<f32>)
+// CHECK-LABEL: llvm.func @get_c64() -> !llvm.struct<(f64, f64)>
+func private @get_c64() -> (complex<f64>)
+// CHECK-LABEL: llvm.func @get_memref() -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>
+// CHECK32-LABEL: llvm.func @get_memref() -> !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>
+func private @get_memref() -> (memref<42x?x10x?xf32>)
+
+// CHECK-LABEL: llvm.func @multireturn() -> !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)> {
+// CHECK32-LABEL: llvm.func @multireturn() -> !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)> {
+func @multireturn() -> (i64, f32, memref<42x?x10x?xf32>) {
+^bb0:
+// CHECK-NEXT:  {{.*}} = llvm.call @get_i64() : () -> i64
+// CHECK-NEXT:  {{.*}} = llvm.call @get_f32() : () -> f32
+// CHECK-NEXT:  {{.*}} = llvm.call @get_memref() : () -> !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>
+// CHECK32-NEXT:  {{.*}} = llvm.call @get_i64() : () -> i64
+// CHECK32-NEXT:  {{.*}} = llvm.call @get_f32() : () -> f32
+// CHECK32-NEXT:  {{.*}} = llvm.call @get_memref() : () -> !llvm.struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>
+  %0 = call @get_i64() : () -> (i64)
+  %1 = call @get_f32() : () -> (f32)
+  %2 = call @get_memref() : () -> (memref<42x?x10x?xf32>)
+// CHECK-NEXT:  {{.*}} = llvm.mlir.undef : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)>
+// CHECK-NEXT:  {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)>
+// CHECK-NEXT:  {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)>
+// CHECK-NEXT:  {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)>
+// CHECK-NEXT:  llvm.return {{.*}} : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)>
+// CHECK32-NEXT:  {{.*}} = llvm.mlir.undef : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)>
+// CHECK32-NEXT:  {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)>
+// CHECK32-NEXT:  {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)>
+// CHECK32-NEXT:  {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)>
+// CHECK32-NEXT:  llvm.return {{.*}} : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)>
+  return %0, %1, %2 : i64, f32, memref<42x?x10x?xf32>
+}
+
+
+// CHECK-LABEL: llvm.func @multireturn_caller() {
+// CHECK32-LABEL: llvm.func @multireturn_caller() {
+func @multireturn_caller() {
+^bb0:
+// CHECK-NEXT:  {{.*}} = llvm.call @multireturn() : () -> !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)>
+// CHECK-NEXT:  {{.*}} = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)>
+// CHECK-NEXT:  {{.*}} = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)>
+// CHECK-NEXT:  {{.*}} = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i64, array<4 x i64>, array<4 x i64>)>)>
+// CHECK32-NEXT:  {{.*}} = llvm.call @multireturn() : () -> !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)>
+// CHECK32-NEXT:  {{.*}} = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)>
+// CHECK32-NEXT:  {{.*}} = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)>
+// CHECK32-NEXT:  {{.*}} = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i64, f32, struct<(ptr<f32>, ptr<f32>, i32, array<4 x i32>, array<4 x i32>)>)>
+  %0:3 = call @multireturn() : () -> (i64, f32, memref<42x?x10x?xf32>)
+  %1 = constant 42 : i64
+// CHECK:       {{.*}} = llvm.add {{.*}}, {{.*}} : i64
+  %2 = addi %0#0, %1 : i64
+  %3 = constant 42.0 : f32
+// CHECK:       {{.*}} = llvm.fadd {{.*}}, {{.*}} : f32
+  %4 = addf %0#1, %3 : f32
+  %5 = constant 0 : index
+  return
+}
+
+// CHECK-LABEL: llvm.func @vector_ops(%arg0: vector<4xf32>, %arg1: vector<4xi1>, %arg2: vector<4xi64>, %arg3: vector<4xi64>) -> vector<4xf32> {
+func @vector_ops(%arg0: vector<4xf32>, %arg1: vector<4xi1>, %arg2: vector<4xi64>, %arg3: vector<4xi64>) -> vector<4xf32> {
+// CHECK-NEXT:  %0 = llvm.mlir.constant(dense<4.200000e+01> : vector<4xf32>) : vector<4xf32>
+  %0 = constant dense<42.> : vector<4xf32>
+// CHECK-NEXT:  %1 = llvm.fadd %arg0, %0 : vector<4xf32>
+  %1 = addf %arg0, %0 : vector<4xf32>
+// CHECK-NEXT:  %2 = llvm.sdiv %arg2, %arg2 : vector<4xi64>
+  %3 = divi_signed %arg2, %arg2 : vector<4xi64>
+// CHECK-NEXT:  %3 = llvm.udiv %arg2, %arg2 : vector<4xi64>
+  %4 = divi_unsigned %arg2, %arg2 : vector<4xi64>
+// CHECK-NEXT:  %4 = llvm.srem %arg2, %arg2 : vector<4xi64>
+  %5 = remi_signed %arg2, %arg2 : vector<4xi64>
+// CHECK-NEXT:  %5 = llvm.urem %arg2, %arg2 : vector<4xi64>
+  %6 = remi_unsigned %arg2, %arg2 : vector<4xi64>
+// CHECK-NEXT:  %6 = llvm.fdiv %arg0, %0 : vector<4xf32>
+  %7 = divf %arg0, %0 : vector<4xf32>
+// CHECK-NEXT:  %7 = llvm.frem %arg0, %0 : vector<4xf32>
+  %8 = remf %arg0, %0 : vector<4xf32>
+// CHECK-NEXT:  %8 = llvm.and %arg2, %arg3 : vector<4xi64>
+  %9 = and %arg2, %arg3 : vector<4xi64>
+// CHECK-NEXT:  %9 = llvm.or %arg2, %arg3 : vector<4xi64>
+  %10 = or %arg2, %arg3 : vector<4xi64>
+// CHECK-NEXT:  %10 = llvm.xor %arg2, %arg3 : vector<4xi64>
+  %11 = xor %arg2, %arg3 : vector<4xi64>
+// CHECK-NEXT:  %11 = llvm.shl %arg2, %arg2 : vector<4xi64>
+  %12 = shift_left %arg2, %arg2 : vector<4xi64>
+// CHECK-NEXT:  %12 = llvm.ashr %arg2, %arg2 : vector<4xi64>
+  %13 = shift_right_signed %arg2, %arg2 : vector<4xi64>
+// CHECK-NEXT:  %13 = llvm.lshr %arg2, %arg2 : vector<4xi64>
+  %14 = shift_right_unsigned %arg2, %arg2 : vector<4xi64>
+  return %1 : vector<4xf32>
+}
+
+// CHECK-LABEL: @ops
+func @ops(f32, f32, i32, i32, f64) -> (f32, i32) {
+^bb0(%arg0: f32, %arg1: f32, %arg2: i32, %arg3: i32, %arg4: f64):
+// CHECK-NEXT:  %0 = llvm.fsub %arg0, %arg1 : f32
+  %0 = subf %arg0, %arg1: f32
+// CHECK-NEXT:  %1 = llvm.sub %arg2, %arg3 : i32
+  %1 = subi %arg2, %arg3: i32
+// CHECK-NEXT:  %2 = llvm.icmp "slt" %arg2, %1 : i32
+  %2 = cmpi slt, %arg2, %1 : i32
+// CHECK-NEXT:  %3 = llvm.sdiv %arg2, %arg3 : i32
+  %3 = divi_signed %arg2, %arg3 : i32
+// CHECK-NEXT:  %4 = llvm.udiv %arg2, %arg3 : i32
+  %4 = divi_unsigned %arg2, %arg3 : i32
+// CHECK-NEXT:  %5 = llvm.srem %arg2, %arg3 : i32
+  %5 = remi_signed %arg2, %arg3 : i32
+// CHECK-NEXT:  %6 = llvm.urem %arg2, %arg3 : i32
+  %6 = remi_unsigned %arg2, %arg3 : i32
+// CHECK-NEXT:  %7 = llvm.select %2, %arg2, %arg3 : i1, i32
+  %7 = select %2, %arg2, %arg3 : i32
+// CHECK-NEXT:  %8 = llvm.fdiv %arg0, %arg1 : f32
+  %8 = divf %arg0, %arg1 : f32
+// CHECK-NEXT:  %9 = llvm.frem %arg0, %arg1 : f32
+  %9 = remf %arg0, %arg1 : f32
+// CHECK-NEXT: %10 = llvm.and %arg2, %arg3 : i32
+  %10 = and %arg2, %arg3 : i32
+// CHECK-NEXT: %11 = llvm.or %arg2, %arg3 : i32
+  %11 = or %arg2, %arg3 : i32
+// CHECK-NEXT: %12 = llvm.xor %arg2, %arg3 : i32
+  %12 = xor %arg2, %arg3 : i32
+// CHECK-NEXT: %13 = "llvm.intr.exp"(%arg0) : (f32) -> f32
+  %13 = math.exp %arg0 : f32
+// CHECK-NEXT: %14 = "llvm.intr.exp2"(%arg0) : (f32) -> f32
+  %14 = math.exp2 %arg0 : f32
+// CHECK-NEXT: %15 = llvm.mlir.constant(7.900000e-01 : f64) : f64
+  %15 = constant 7.9e-01 : f64
+// CHECK-NEXT: %16 = llvm.shl %arg2, %arg3 : i32
+  %16 = shift_left %arg2, %arg3 : i32
+// CHECK-NEXT: %17 = llvm.ashr %arg2, %arg3 : i32
+  %17 = shift_right_signed %arg2, %arg3 : i32
+// CHECK-NEXT: %18 = llvm.lshr %arg2, %arg3 : i32
+  %18 = shift_right_unsigned %arg2, %arg3 : i32
+// CHECK-NEXT: %{{[0-9]+}} = "llvm.intr.sqrt"(%arg0) : (f32) -> f32
+  %19 = math.sqrt %arg0 : f32
+// CHECK-NEXT: %{{[0-9]+}} = "llvm.intr.sqrt"(%arg4) : (f64) -> f64
+  %20 = math.sqrt %arg4 : f64
+  return %0, %4 : f32, i32
+}
+
+// Checking conversion of index types to integers using i1, assuming no target
+// system would have a 1-bit address space.  Otherwise, we would have had to
+// make this test dependent on the pointer size on the target system.
+// CHECK-LABEL: @index_cast
+func @index_cast(%arg0: index, %arg1: i1) {
+// CHECK-NEXT: = llvm.trunc %arg0 : i{{.*}} to i1
+  %0 = index_cast %arg0: index to i1
+// CHECK-NEXT: = llvm.sext %arg1 : i1 to i{{.*}}
+  %1 = index_cast %arg1: i1 to index
+  return
+}
+
+// CHECK-LABEL: @vector_index_cast
+func @vector_index_cast(%arg0: vector<2xindex>, %arg1: vector<2xi1>) {
+// CHECK-NEXT: = llvm.trunc %{{.*}} : vector<2xi{{.*}}> to vector<2xi1>
+  %0 = index_cast %arg0: vector<2xindex> to vector<2xi1>
+// CHECK-NEXT: = llvm.sext %{{.*}} : vector<2xi1> to vector<2xi{{.*}}>
+  %1 = index_cast %arg1: vector<2xi1> to vector<2xindex>
+  return
+}
+
+// Checking conversion of signed integer types to floating point.
+// CHECK-LABEL: @sitofp
+func @sitofp(%arg0 : i32, %arg1 : i64) {
+// CHECK-NEXT: = llvm.sitofp {{.*}} : i32 to f32
+  %0 = sitofp %arg0: i32 to f32
+// CHECK-NEXT: = llvm.sitofp {{.*}} : i32 to f64
+  %1 = sitofp %arg0: i32 to f64
+// CHECK-NEXT: = llvm.sitofp {{.*}} : i64 to f32
+  %2 = sitofp %arg1: i64 to f32
+// CHECK-NEXT: = llvm.sitofp {{.*}} : i64 to f64
+  %3 = sitofp %arg1: i64 to f64
+  return
+}
+
+// Checking conversion of integer vectors to floating point vector types.
+// CHECK-LABEL: @sitofp_vector
+func @sitofp_vector(%arg0 : vector<2xi16>, %arg1 : vector<2xi32>, %arg2 : vector<2xi64>) {
+// CHECK-NEXT: = llvm.sitofp {{.*}} : vector<2xi16> to vector<2xf32>
+  %0 = sitofp %arg0: vector<2xi16> to vector<2xf32>
+// CHECK-NEXT: = llvm.sitofp {{.*}} : vector<2xi16> to vector<2xf64>
+  %1 = sitofp %arg0: vector<2xi16> to vector<2xf64>
+// CHECK-NEXT: = llvm.sitofp {{.*}} : vector<2xi32> to vector<2xf32>
+  %2 = sitofp %arg1: vector<2xi32> to vector<2xf32>
+// CHECK-NEXT: = llvm.sitofp {{.*}} : vector<2xi32> to vector<2xf64>
+  %3 = sitofp %arg1: vector<2xi32> to vector<2xf64>
+// CHECK-NEXT: = llvm.sitofp {{.*}} : vector<2xi64> to vector<2xf32>
+  %4 = sitofp %arg2: vector<2xi64> to vector<2xf32>
+// CHECK-NEXT: = llvm.sitofp {{.*}} : vector<2xi64> to vector<2xf64>
+  %5 = sitofp %arg2: vector<2xi64> to vector<2xf64>
+  return
+}
+
+// Checking conversion of unsigned integer types to floating point.
+// CHECK-LABEL: @uitofp
+func @uitofp(%arg0 : i32, %arg1 : i64) {
+// CHECK-NEXT: = llvm.uitofp {{.*}} : i32 to f32
+  %0 = uitofp %arg0: i32 to f32
+// CHECK-NEXT: = llvm.uitofp {{.*}} : i32 to f64
+  %1 = uitofp %arg0: i32 to f64
+// CHECK-NEXT: = llvm.uitofp {{.*}} : i64 to f32
+  %2 = uitofp %arg1: i64 to f32
+// CHECK-NEXT: = llvm.uitofp {{.*}} : i64 to f64
+  %3 = uitofp %arg1: i64 to f64
+  return
 }
 
+// Checking conversion of integer types to floating point.
+// CHECK-LABEL: @fpext
+func @fpext(%arg0 : f16, %arg1 : f32) {
+// CHECK-NEXT: = llvm.fpext {{.*}} : f16 to f32
+  %0 = fpext %arg0: f16 to f32
+// CHECK-NEXT: = llvm.fpext {{.*}} : f16 to f64
+  %1 = fpext %arg0: f16 to f64
+// CHECK-NEXT: = llvm.fpext {{.*}} : f32 to f64
+  %2 = fpext %arg1: f32 to f64
+  return
+}
+
+// Checking conversion of integer types to floating point.
+// CHECK-LABEL: @fpext
+func @fpext_vector(%arg0 : vector<2xf16>, %arg1 : vector<2xf32>) {
+// CHECK-NEXT: = llvm.fpext {{.*}} : vector<2xf16> to vector<2xf32>
+  %0 = fpext %arg0: vector<2xf16> to vector<2xf32>
+// CHECK-NEXT: = llvm.fpext {{.*}} : vector<2xf16> to vector<2xf64>
+  %1 = fpext %arg0: vector<2xf16> to vector<2xf64>
+// CHECK-NEXT: = llvm.fpext {{.*}} : vector<2xf32> to vector<2xf64>
+  %2 = fpext %arg1: vector<2xf32> to vector<2xf64>
+  return
+}
+
+// Checking conversion of floating point to integer types.
+// CHECK-LABEL: @fptosi
+func @fptosi(%arg0 : f32, %arg1 : f64) {
+// CHECK-NEXT: = llvm.fptosi {{.*}} : f32 to i32
+  %0 = fptosi %arg0: f32 to i32
+// CHECK-NEXT: = llvm.fptosi {{.*}} : f32 to i64
+  %1 = fptosi %arg0: f32 to i64
+// CHECK-NEXT: = llvm.fptosi {{.*}} : f64 to i32
+  %2 = fptosi %arg1: f64 to i32
+// CHECK-NEXT: = llvm.fptosi {{.*}} : f64 to i64
+  %3 = fptosi %arg1: f64 to i64
+  return
+}
+
+// Checking conversion of floating point vectors to integer vector types.
+// CHECK-LABEL: @fptosi_vector
+func @fptosi_vector(%arg0 : vector<2xf16>, %arg1 : vector<2xf32>, %arg2 : vector<2xf64>) {
+// CHECK-NEXT: = llvm.fptosi {{.*}} : vector<2xf16> to vector<2xi32>
+  %0 = fptosi %arg0: vector<2xf16> to vector<2xi32>
+// CHECK-NEXT: = llvm.fptosi {{.*}} : vector<2xf16> to vector<2xi64>
+  %1 = fptosi %arg0: vector<2xf16> to vector<2xi64>
+// CHECK-NEXT: = llvm.fptosi {{.*}} : vector<2xf32> to vector<2xi32>
+  %2 = fptosi %arg1: vector<2xf32> to vector<2xi32>
+// CHECK-NEXT: = llvm.fptosi {{.*}} : vector<2xf32> to vector<2xi64>
+  %3 = fptosi %arg1: vector<2xf32> to vector<2xi64>
+// CHECK-NEXT: = llvm.fptosi {{.*}} : vector<2xf64> to vector<2xi32>
+  %4 = fptosi %arg2: vector<2xf64> to vector<2xi32>
+// CHECK-NEXT: = llvm.fptosi {{.*}} : vector<2xf64> to vector<2xi64>
+  %5 = fptosi %arg2: vector<2xf64> to vector<2xi64>
+  return
+}
+
+// Checking conversion of floating point to integer types.
+// CHECK-LABEL: @fptoui
+func @fptoui(%arg0 : f32, %arg1 : f64) {
+// CHECK-NEXT: = llvm.fptoui {{.*}} : f32 to i32
+  %0 = fptoui %arg0: f32 to i32
+// CHECK-NEXT: = llvm.fptoui {{.*}} : f32 to i64
+  %1 = fptoui %arg0: f32 to i64
+// CHECK-NEXT: = llvm.fptoui {{.*}} : f64 to i32
+  %2 = fptoui %arg1: f64 to i32
+// CHECK-NEXT: = llvm.fptoui {{.*}} : f64 to i64
+  %3 = fptoui %arg1: f64 to i64
+  return
+}
+
+// Checking conversion of floating point vectors to integer vector types.
+// CHECK-LABEL: @fptoui_vector
+func @fptoui_vector(%arg0 : vector<2xf16>, %arg1 : vector<2xf32>, %arg2 : vector<2xf64>) {
+// CHECK-NEXT: = llvm.fptoui {{.*}} : vector<2xf16> to vector<2xi32>
+  %0 = fptoui %arg0: vector<2xf16> to vector<2xi32>
+// CHECK-NEXT: = llvm.fptoui {{.*}} : vector<2xf16> to vector<2xi64>
+  %1 = fptoui %arg0: vector<2xf16> to vector<2xi64>
+// CHECK-NEXT: = llvm.fptoui {{.*}} : vector<2xf32> to vector<2xi32>
+  %2 = fptoui %arg1: vector<2xf32> to vector<2xi32>
+// CHECK-NEXT: = llvm.fptoui {{.*}} : vector<2xf32> to vector<2xi64>
+  %3 = fptoui %arg1: vector<2xf32> to vector<2xi64>
+// CHECK-NEXT: = llvm.fptoui {{.*}} : vector<2xf64> to vector<2xi32>
+  %4 = fptoui %arg2: vector<2xf64> to vector<2xi32>
+// CHECK-NEXT: = llvm.fptoui {{.*}} : vector<2xf64> to vector<2xi64>
+  %5 = fptoui %arg2: vector<2xf64> to vector<2xi64>
+  return
+}
+
+// Checking conversion of integer vectors to floating point vector types.
+// CHECK-LABEL: @uitofp_vector
+func @uitofp_vector(%arg0 : vector<2xi16>, %arg1 : vector<2xi32>, %arg2 : vector<2xi64>) {
+// CHECK-NEXT: = llvm.uitofp {{.*}} : vector<2xi16> to vector<2xf32>
+  %0 = uitofp %arg0: vector<2xi16> to vector<2xf32>
+// CHECK-NEXT: = llvm.uitofp {{.*}} : vector<2xi16> to vector<2xf64>
+  %1 = uitofp %arg0: vector<2xi16> to vector<2xf64>
+// CHECK-NEXT: = llvm.uitofp {{.*}} : vector<2xi32> to vector<2xf32>
+  %2 = uitofp %arg1: vector<2xi32> to vector<2xf32>
+// CHECK-NEXT: = llvm.uitofp {{.*}} : vector<2xi32> to vector<2xf64>
+  %3 = uitofp %arg1: vector<2xi32> to vector<2xf64>
+// CHECK-NEXT: = llvm.uitofp {{.*}} : vector<2xi64> to vector<2xf32>
+  %4 = uitofp %arg2: vector<2xi64> to vector<2xf32>
+// CHECK-NEXT: = llvm.uitofp {{.*}} : vector<2xi64> to vector<2xf64>
+  %5 = uitofp %arg2: vector<2xi64> to vector<2xf64>
+  return
+}
+
+// Checking conversion of integer types to floating point.
+// CHECK-LABEL: @fptrunc
+func @fptrunc(%arg0 : f32, %arg1 : f64) {
+// CHECK-NEXT: = llvm.fptrunc {{.*}} : f32 to f16
+  %0 = fptrunc %arg0: f32 to f16
+// CHECK-NEXT: = llvm.fptrunc {{.*}} : f64 to f16
+  %1 = fptrunc %arg1: f64 to f16
+// CHECK-NEXT: = llvm.fptrunc {{.*}} : f64 to f32
+  %2 = fptrunc %arg1: f64 to f32
+  return
+}
+
+// Checking conversion of integer types to floating point.
+// CHECK-LABEL: @fptrunc
+func @fptrunc_vector(%arg0 : vector<2xf32>, %arg1 : vector<2xf64>) {
+// CHECK-NEXT: = llvm.fptrunc {{.*}} : vector<2xf32> to vector<2xf16>
+  %0 = fptrunc %arg0: vector<2xf32> to vector<2xf16>
+// CHECK-NEXT: = llvm.fptrunc {{.*}} : vector<2xf64> to vector<2xf16>
+  %1 = fptrunc %arg1: vector<2xf64> to vector<2xf16>
+// CHECK-NEXT: = llvm.fptrunc {{.*}} : vector<2xf64> to vector<2xf32>
+  %2 = fptrunc %arg1: vector<2xf64> to vector<2xf32>
+  return
+}
+
+// Check sign and zero extension and truncation of integers.
+// CHECK-LABEL: @integer_extension_and_truncation
+func @integer_extension_and_truncation(%arg0 : i3) {
+// CHECK-NEXT: = llvm.sext %arg0 : i3 to i6
+  %0 = sexti %arg0 : i3 to i6
+// CHECK-NEXT: = llvm.zext %arg0 : i3 to i6
+  %1 = zexti %arg0 : i3 to i6
+// CHECK-NEXT: = llvm.trunc %arg0 : i3 to i2
+   %2 = trunci %arg0 : i3 to i2
+  return
+}
+
+// CHECK-LABEL: @dfs_block_order
+func @dfs_block_order(%arg0: i32) -> (i32) {
+// CHECK-NEXT:  %[[CST:.*]] = llvm.mlir.constant(42 : i32) : i32
+  %0 = constant 42 : i32
+// CHECK-NEXT:  llvm.br ^bb2
+  br ^bb2
+
+// CHECK-NEXT: ^bb1:
+// CHECK-NEXT:  %[[ADD:.*]] = llvm.add %arg0, %[[CST]] : i32
+// CHECK-NEXT:  llvm.return %[[ADD]] : i32
+^bb1:
+  %2 = addi %arg0, %0 : i32
+  return %2 : i32
+
+// CHECK-NEXT: ^bb2:
+^bb2:
+// CHECK-NEXT:  llvm.br ^bb1
+  br ^bb1
+}
+
+// CHECK-LABEL: func @fcmp(%arg0: f32, %arg1: f32) {
+func @fcmp(f32, f32) -> () {
+^bb0(%arg0: f32, %arg1: f32):
+  // CHECK:      llvm.fcmp "oeq" %arg0, %arg1 : f32
+  // CHECK-NEXT: llvm.fcmp "ogt" %arg0, %arg1 : f32
+  // CHECK-NEXT: llvm.fcmp "oge" %arg0, %arg1 : f32
+  // CHECK-NEXT: llvm.fcmp "olt" %arg0, %arg1 : f32
+  // CHECK-NEXT: llvm.fcmp "ole" %arg0, %arg1 : f32
+  // CHECK-NEXT: llvm.fcmp "one" %arg0, %arg1 : f32
+  // CHECK-NEXT: llvm.fcmp "ord" %arg0, %arg1 : f32
+  // CHECK-NEXT: llvm.fcmp "ueq" %arg0, %arg1 : f32
+  // CHECK-NEXT: llvm.fcmp "ugt" %arg0, %arg1 : f32
+  // CHECK-NEXT: llvm.fcmp "uge" %arg0, %arg1 : f32
+  // CHECK-NEXT: llvm.fcmp "ult" %arg0, %arg1 : f32
+  // CHECK-NEXT: llvm.fcmp "ule" %arg0, %arg1 : f32
+  // CHECK-NEXT: llvm.fcmp "une" %arg0, %arg1 : f32
+  // CHECK-NEXT: llvm.fcmp "uno" %arg0, %arg1 : f32
+  // CHECK-NEXT: llvm.return
+  %1 = cmpf oeq, %arg0, %arg1 : f32
+  %2 = cmpf ogt, %arg0, %arg1 : f32
+  %3 = cmpf oge, %arg0, %arg1 : f32
+  %4 = cmpf olt, %arg0, %arg1 : f32
+  %5 = cmpf ole, %arg0, %arg1 : f32
+  %6 = cmpf one, %arg0, %arg1 : f32
+  %7 = cmpf ord, %arg0, %arg1 : f32
+  %8 = cmpf ueq, %arg0, %arg1 : f32
+  %9 = cmpf ugt, %arg0, %arg1 : f32
+  %10 = cmpf uge, %arg0, %arg1 : f32
+  %11 = cmpf ult, %arg0, %arg1 : f32
+  %12 = cmpf ule, %arg0, %arg1 : f32
+  %13 = cmpf une, %arg0, %arg1 : f32
+  %14 = cmpf uno, %arg0, %arg1 : f32
+
+  return
+}
+
+// CHECK-LABEL: @splat
+// CHECK-SAME: %[[A:arg[0-9]+]]: vector<4xf32>
+// CHECK-SAME: %[[ELT:arg[0-9]+]]: f32
+func @splat(%a: vector<4xf32>, %b: f32) -> vector<4xf32> {
+  %vb = splat %b : vector<4xf32>
+  %r = mulf %a, %vb : vector<4xf32>
+  return %r : vector<4xf32>
+}
+// CHECK-NEXT: %[[UNDEF:[0-9]+]] = llvm.mlir.undef : vector<4xf32>
+// CHECK-NEXT: %[[ZERO:[0-9]+]] = llvm.mlir.constant(0 : i32) : i32
+// CHECK-NEXT: %[[V:[0-9]+]] = llvm.insertelement %[[ELT]], %[[UNDEF]][%[[ZERO]] : i32] : vector<4xf32>
+// CHECK-NEXT: %[[SPLAT:[0-9]+]] = llvm.shufflevector %[[V]], %[[UNDEF]] [0 : i32, 0 : i32, 0 : i32, 0 : i32]
+// CHECK-NEXT: %[[SCALE:[0-9]+]] = llvm.fmul %[[A]], %[[SPLAT]] : vector<4xf32>
+// CHECK-NEXT: llvm.return %[[SCALE]] : vector<4xf32>
+
+// -----
+
+// CHECK-LABEL: func @atomic_rmw
+func @atomic_rmw(%I : memref<10xi32>, %ival : i32, %F : memref<10xf32>, %fval : f32, %i : index) {
+  atomic_rmw assign %fval, %F[%i] : (f32, memref<10xf32>) -> f32
+  // CHECK: llvm.atomicrmw xchg %{{.*}}, %{{.*}} acq_rel
+  atomic_rmw addi %ival, %I[%i] : (i32, memref<10xi32>) -> i32
+  // CHECK: llvm.atomicrmw add %{{.*}}, %{{.*}} acq_rel
+  atomic_rmw maxs %ival, %I[%i] : (i32, memref<10xi32>) -> i32
+  // CHECK: llvm.atomicrmw max %{{.*}}, %{{.*}} acq_rel
+  atomic_rmw mins %ival, %I[%i] : (i32, memref<10xi32>) -> i32
+  // CHECK: llvm.atomicrmw min %{{.*}}, %{{.*}} acq_rel
+  atomic_rmw maxu %ival, %I[%i] : (i32, memref<10xi32>) -> i32
+  // CHECK: llvm.atomicrmw umax %{{.*}}, %{{.*}} acq_rel
+  atomic_rmw minu %ival, %I[%i] : (i32, memref<10xi32>) -> i32
+  // CHECK: llvm.atomicrmw umin %{{.*}}, %{{.*}} acq_rel
+  atomic_rmw addf %fval, %F[%i] : (f32, memref<10xf32>) -> f32
+  // CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} acq_rel
+  return
+}
+
+// -----
+
+// CHECK-LABEL: func @generic_atomic_rmw
+func @generic_atomic_rmw(%I : memref<10xi32>, %i : index) -> i32 {
+  %x = generic_atomic_rmw %I[%i] : memref<10xi32> {
+    ^bb0(%old_value : i32):
+      %c1 = constant 1 : i32
+      atomic_yield %c1 : i32
+  }
+  // CHECK: [[init:%.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
+  // CHECK-NEXT: llvm.br ^bb1([[init]] : i32)
+  // CHECK-NEXT: ^bb1([[loaded:%.*]]: i32):
+  // CHECK-NEXT: [[c1:%.*]] = llvm.mlir.constant(1 : i32)
+  // CHECK-NEXT: [[pair:%.*]] = llvm.cmpxchg %{{.*}}, [[loaded]], [[c1]]
+  // CHECK-SAME:                    acq_rel monotonic : i32
+  // CHECK-NEXT: [[new:%.*]] = llvm.extractvalue [[pair]][0]
+  // CHECK-NEXT: [[ok:%.*]] = llvm.extractvalue [[pair]][1]
+  // CHECK-NEXT: llvm.cond_br [[ok]], ^bb2, ^bb1([[new]] : i32)
+  // CHECK-NEXT: ^bb2:
+  %c2 = constant 2 : i32
+  %add = addi %c2, %x : i32
+  return %add : i32
+  // CHECK-NEXT: [[c2:%.*]] = llvm.mlir.constant(2 : i32)
+  // CHECK-NEXT: [[add:%.*]] = llvm.add [[c2]], [[new]] : i32
+  // CHECK-NEXT: llvm.return [[add]]
+}
+
+// -----
+
+// CHECK-LABEL: func @rank_of_unranked
+// CHECK32-LABEL: func @rank_of_unranked
+func @rank_of_unranked(%unranked: memref<*xi32>) {
+  %rank = rank %unranked : memref<*xi32>
+  return
+}
+// CHECK-NEXT: llvm.mlir.undef
+// CHECK-NEXT: llvm.insertvalue
+// CHECK-NEXT: llvm.insertvalue
+// CHECK-NEXT: llvm.extractvalue %{{.*}}[0] : !llvm.struct<(i64, ptr<i8>)>
+// CHECK32: llvm.extractvalue %{{.*}}[0] : !llvm.struct<(i32, ptr<i8>)>
+
+// CHECK-LABEL: func @rank_of_ranked
+// CHECK32-LABEL: func @rank_of_ranked
+func @rank_of_ranked(%ranked: memref<?xi32>) {
+  %rank = rank %ranked : memref<?xi32>
+  return
+}
+// CHECK: llvm.mlir.constant(1 : index) : i64
+// CHECK32: llvm.mlir.constant(1 : index) : i32
+
+
 // -----
 
 // CHECK-LABEL: func @log1p(
@@ -147,99 +997,6 @@ func @assert_test_function(%arg : i1) {
 
 // -----
 
-// CHECK-LABEL: func @transpose
-//       CHECK:   llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
-//       CHECK:   llvm.insertvalue {{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
-//       CHECK:    llvm.insertvalue {{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
-//       CHECK:    llvm.insertvalue {{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
-//       CHECK:   llvm.extractvalue {{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
-//       CHECK:    llvm.insertvalue {{.*}}[3, 2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
-//       CHECK:   llvm.extractvalue {{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
-//       CHECK:    llvm.insertvalue {{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
-//       CHECK:   llvm.extractvalue {{.*}}[3, 2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
-//       CHECK:    llvm.insertvalue {{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
-func @transpose(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
-  %0 = memref.transpose %arg0 (i, j, k) -> (k, i, j) : memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]> to memref<?x?x?xf32, affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2 * s1 + s0 + d0 * s2 + d1)>>
-  return
-}
-
-// -----
-
-// CHECK: llvm.mlir.global external @gv0() : !llvm.array<2 x f32>
-memref.global @gv0 : memref<2xf32> = uninitialized
-
-// CHECK: llvm.mlir.global private @gv1() : !llvm.array<2 x f32>
-memref.global "private" @gv1 : memref<2xf32>
-
-// CHECK: llvm.mlir.global external @gv2(dense<{{\[\[}}0.000000e+00, 1.000000e+00, 2.000000e+00], [3.000000e+00, 4.000000e+00, 5.000000e+00]]> : tensor<2x3xf32>) : !llvm.array<2 x array<3 x f32>>
-memref.global @gv2 : memref<2x3xf32> = dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]>
-
-// Test 1D memref.
-// CHECK-LABEL: func @get_gv0_memref
-func @get_gv0_memref() {
-  %0 = memref.get_global @gv0 : memref<2xf32>
-  // CHECK: %[[DIM:.*]] = llvm.mlir.constant(2 : index) : i64
-  // CHECK: %[[STRIDE:.*]] = llvm.mlir.constant(1 : index) : i64
-  // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @gv0 : !llvm.ptr<array<2 x f32>>
-  // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : index) : i64
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ADDR]][%[[ZERO]], %[[ZERO]]] : (!llvm.ptr<array<2 x f32>>, i64, i64) -> !llvm.ptr<f32>
-  // CHECK: %[[DEADBEEF:.*]] = llvm.mlir.constant(3735928559 : index) : i64
-  // CHECK: %[[DEADBEEFPTR:.*]] = llvm.inttoptr %[[DEADBEEF]] : i64 to !llvm.ptr<f32>
-  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
-  // CHECK: llvm.insertvalue %[[DEADBEEFPTR]], {{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
-  // CHECK: llvm.insertvalue %[[GEP]], {{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
-  // CHECK: %[[OFFSET:.*]] = llvm.mlir.constant(0 : index) : i64
-  // CHECK: llvm.insertvalue %[[OFFSET]], {{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
-  // CHECK: llvm.insertvalue %[[DIM]], {{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
-  // CHECK: llvm.insertvalue %[[STRIDE]], {{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>)>
-  return
-}
-
-// Test 2D memref.
-// CHECK-LABEL: func @get_gv2_memref
-func @get_gv2_memref() {
-  // CHECK: %[[DIM0:.*]] = llvm.mlir.constant(2 : index) : i64
-  // CHECK: %[[DIM1:.*]] = llvm.mlir.constant(3 : index) : i64
-  // CHECK: %[[STRIDE1:.*]] = llvm.mlir.constant(1 : index) : i64
-  // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @gv2 : !llvm.ptr<array<2 x array<3 x f32>>>
-  // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : index) : i64
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ADDR]][%[[ZERO]], %[[ZERO]], %[[ZERO]]] : (!llvm.ptr<array<2 x array<3 x f32>>>, i64, i64, i64) -> !llvm.ptr<f32>
-  // CHECK: %[[DEADBEEF:.*]] = llvm.mlir.constant(3735928559 : index) : i64
-  // CHECK: %[[DEADBEEFPTR:.*]] = llvm.inttoptr %[[DEADBEEF]] : i64 to !llvm.ptr<f32>
-  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.insertvalue %[[DEADBEEFPTR]], {{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.insertvalue %[[GEP]], {{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: %[[OFFSET:.*]] = llvm.mlir.constant(0 : index) : i64
-  // CHECK: llvm.insertvalue %[[OFFSET]], {{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.insertvalue %[[DIM0]], {{.*}}[3, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.insertvalue %[[DIM1]], {{.*}}[3, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.insertvalue %[[DIM1]], {{.*}}[4, 0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK: llvm.insertvalue %[[STRIDE1]], {{.*}}[4, 1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-
-  %0 = memref.get_global @gv2 : memref<2x3xf32>
-  return
-}
-
-// Test scalar memref.
-// CHECK: llvm.mlir.global external @gv3(1.000000e+00 : f32) : f32
-memref.global @gv3 : memref<f32> = dense<1.0>
-
-// CHECK-LABEL: func @get_gv3_memref
-func @get_gv3_memref() {
-  // CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @gv3 : !llvm.ptr<f32>
-  // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : index) : i64
-  // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ADDR]][%[[ZERO]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
-  // CHECK: %[[DEADBEEF:.*]] = llvm.mlir.constant(3735928559 : index) : i64
-  // CHECK: %[[DEADBEEFPTR:.*]] = llvm.inttoptr %[[DEADBEEF]] : i64 to !llvm.ptr<f32>
-  // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-  // CHECK: llvm.insertvalue %[[DEADBEEFPTR]], {{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-  // CHECK: llvm.insertvalue %[[GEP]], {{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-  // CHECK: %[[OFFSET:.*]] = llvm.mlir.constant(0 : index) : i64
-  // CHECK: llvm.insertvalue %[[OFFSET]], {{.*}}[2] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
-  %0 = memref.get_global @gv3 : memref<f32>
-  return
-}
-
 // This should not trigger an assertion by creating an LLVM::CallOp with a
 // nullptr result type.
 

diff  --git a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
index 772ae873c8e56..a2def3e507c11 100644
--- a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
@@ -8,6 +8,7 @@
 // RUN:               -convert-scf-to-std                                      \
 // RUN:               -std-expand                                              \
 // RUN:               -convert-vector-to-llvm                                  \
+// RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN: -e entry -entry-point-result=void -O3                                  \
@@ -20,6 +21,7 @@
 // RUN:               -convert-linalg-to-loops                                 \
 // RUN:               -convert-scf-to-std                                      \
 // RUN:               -convert-vector-to-llvm                                  \
+// RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN: -e entry -entry-point-result=void -O3                                  \

diff  --git a/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir b/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir
index 56b090e1e7bf3..65aaad7ec2c4f 100644
--- a/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir
@@ -8,7 +8,8 @@
 // RUN:               -convert-scf-to-std                                      \
 // RUN:               -std-expand                                              \
 // RUN:               -convert-vector-to-llvm                                  \
-// RUN:               -convert-std-to-llvm -print-ir-after-all                                    \
+// RUN:               -convert-memref-to-llvm                                  \
+// RUN:               -convert-std-to-llvm -print-ir-after-all                 \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN: -e entry -entry-point-result=void -O3                                  \
 // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext  \
@@ -26,6 +27,7 @@
 // RUN:               -convert-scf-to-std                                      \
 // RUN:               -std-expand                                              \
 // RUN:               -convert-vector-to-llvm                                  \
+// RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN: -e entry -entry-point-result=void -O3                                  \
@@ -38,6 +40,7 @@
 // RUN:               -convert-linalg-to-loops                                 \
 // RUN:               -convert-scf-to-std                                      \
 // RUN:               -convert-vector-to-llvm                                  \
+// RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN: -e entry -entry-point-result=void -O3                                  \

diff  --git a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir
index 6c2758c484f79..75e4e63814892 100644
--- a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir
@@ -4,6 +4,7 @@
 // RUN:               -async-runtime-ref-counting-opt                          \
 // RUN:               -convert-async-to-llvm                                   \
 // RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN:  -e entry -entry-point-result=void -O0                                 \
@@ -16,6 +17,7 @@
 // RUN:               -async-runtime-policy-based-ref-counting                 \
 // RUN:               -convert-async-to-llvm                                   \
 // RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN:  -e entry -entry-point-result=void -O0                                 \
@@ -31,6 +33,7 @@
 // RUN:               -async-runtime-ref-counting-opt                          \
 // RUN:               -convert-async-to-llvm                                   \
 // RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN:  -e entry -entry-point-result=void -O0                                 \

diff  --git a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir
index d8f99d061b7d4..04d5e0e6bab92 100644
--- a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir
@@ -4,6 +4,7 @@
 // RUN:               -async-runtime-ref-counting-opt                          \
 // RUN:               -convert-async-to-llvm                                   \
 // RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN:  -e entry -entry-point-result=void -O0                                 \
@@ -16,6 +17,7 @@
 // RUN:               -async-runtime-policy-based-ref-counting                 \
 // RUN:               -convert-async-to-llvm                                   \
 // RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN:  -e entry -entry-point-result=void -O0                                 \
@@ -31,6 +33,7 @@
 // RUN:               -async-runtime-ref-counting-opt                          \
 // RUN:               -convert-async-to-llvm                                   \
 // RUN:               -convert-scf-to-std                                      \
+// RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN:  -e entry -entry-point-result=void -O0                                 \

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir
index 1e8dfa25e8cc0..1700910968cc7 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir
@@ -5,7 +5,7 @@
 // RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.copy register-tile-sizes=4,32 vectorize" | \
 
 // RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \
-// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
 // Activate to dump assembly
 // R_UN:   -dump-object-file -object-filename=/tmp/a.o \

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir
index b54d185edeb98..897f360b8153f 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_column_major_as_row_major.mlir
@@ -8,7 +8,7 @@
 // R_UN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.copy register-tile-sizes=4,16 vectorize" | \
 
 // RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \
-// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
 // Activate to dump assembly
 // R_UN:   -dump-object-file -object-filename=/tmp/a.o \

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir
index 0f19a6ab0c26a..61241f49503a4 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul_i8_i8_i32.mlir
@@ -5,7 +5,7 @@
 // RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.copy register-tile-sizes=4,32 vectorize" | \
 // RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \
 
-// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm -mlir-disable-threading | \
+// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -mlir-disable-threading | \
 // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
 // Activate to dump assembly
 // R_UN:   -dump-object-file -object-filename=/tmp/a.o \

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir
index 06d29cdec093b..6bf54dbf1ca27 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/rank-reducing-subview.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/rank-reducing-subview.mlir
index 568acfad529e0..3c086a017bbeb 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/rank-reducing-subview.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/rank-reducing-subview.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir
index b0c2baafd94a1..9f958f9aacd86 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt %s -canonicalize -cse -linalg-comprehensive-module-bufferize |\
 // RUN: mlir-opt -convert-vector-to-scf -lower-affine -convert-linalg-to-loops |\
-// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 
 // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext |\

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
index 19b0baf3e9fdb..29a5141929315 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
@@ -1,21 +1,21 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=4" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=4" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN:   -test-conv-vectorization="tile-sizes=1,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-ncw-filter-wcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-ncw-filter-wcf-call.mlir
index 12fe3b96549f3..7e4b27679b5ce 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-ncw-filter-wcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-ncw-filter-wcf-call.mlir
@@ -1,21 +1,21 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,4" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,4" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir
index f1aaed1f5d27b..71d0c8a56e364 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir
@@ -1,21 +1,21 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,4" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,4" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir
index cdfad2e4fe6de..3a85d500460f7 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-ncw-call.mlir
@@ -1,21 +1,21 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,4" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,4" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir
index 7f685b64722a6..68f890c9dabb2 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-call.mlir
@@ -1,21 +1,21 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,4" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,4" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
index 48827b5a41468..d1f8de5c9d888 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
@@ -1,21 +1,21 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,2" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,3,3"  -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,3,3"  -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,2" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,3,3"  -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN:   -test-conv-vectorization="tile-sizes=1,1,3,3"  -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nchw-filter-hwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nchw-filter-hwcf-call.mlir
index 7615073410f70..32e548cbb3240 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nchw-filter-hwcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nchw-filter-hwcf-call.mlir
@@ -1,21 +1,21 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,0,4,4" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3"  -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3"  -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,0,4,4" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir
index c47d6aa6fe103..50b9861c9dc13 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir
@@ -1,21 +1,21 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,3,2" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,3,2" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir
index 024e85bc843ad..3d40083037937 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nchw-call.mlir
@@ -1,21 +1,21 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,0,4,4" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3"  -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3"  -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,0,4,4" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir
index 2896a7517871e..51326560b59e1 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-call.mlir
@@ -1,21 +1,21 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,3,2" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,3,2" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
index b47cf154323d9..0cba12f7895f6 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
@@ -1,21 +1,21 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,2,2" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,2,2" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ncdhw-filter-dhwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ncdhw-filter-dhwcf-call.mlir
index 1eeb46491deaf..5f063543a584e 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ncdhw-filter-dhwcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ncdhw-filter-dhwcf-call.mlir
@@ -1,21 +1,21 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,5,5,5" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,5,5,5" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir
index 0619a1b9ac9cb..490bb62c14dac 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir
@@ -1,21 +1,21 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,5,5,5" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,5,5,5" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir
index 4f3bc9b77736f..cb7b49eec4d22 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ncdhw-call.mlir
@@ -1,21 +1,21 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,5,5,5" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,5,5,5" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir
index a47f1716ff073..f761088b22811 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-call.mlir
@@ -1,21 +1,21 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,5,5,5" -convert-linalg-to-loops -convert-scf-to-std \
-// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,5,5,5" \
-// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN:   -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-elementwise.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-elementwise.mlir
index f18b265064472..1feb8d19eab7c 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-elementwise.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-elementwise.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-elementwise-to-linalg -std-bufferize -tensor-constant-bufferize -linalg-bufferize -tensor-bufferize -func-bufferize -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-elementwise-to-linalg -std-bufferize -tensor-constant-bufferize -linalg-bufferize -tensor-bufferize -func-bufferize -convert-linalg-to-loops -convert-linalg-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
index 162f5282f8e3f..7c595b5e21d73 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s -linalg-bufferize -std-bufferize \
 // RUN: -tensor-constant-bufferize -tensor-bufferize -func-bufferize \
 // RUN: -finalizing-bufferize \
-// RUN: -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -convert-std-to-llvm | \
+// RUN: -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir
index 0137081776dc9..42d0a3ab28a04 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s -linalg-bufferize -std-bufferize \
 // RUN: -tensor-constant-bufferize -tensor-bufferize -func-bufferize \
 // RUN: -finalizing-bufferize \
-// RUN: -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -convert-std-to-llvm | \
+// RUN: -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir
index 0fd30cc5bb5a5..e0dc655842b85 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s -linalg-bufferize -std-bufferize \
 // RUN: -tensor-constant-bufferize -tensor-bufferize -func-bufferize \
 // RUN: -finalizing-bufferize \
-// RUN: -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -convert-std-to-llvm | \
+// RUN: -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir
index 71e27733c83f6..1f4134e125124 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt %s -tensor-constant-bufferize -std-bufferize -linalg-bufferize \
 // RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize -convert-linalg-to-loops \
-// RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \
+// RUN: -convert-linalg-to-llvm --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
index f7b12d6085dc0..7f7751f0c4d96 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt %s -linalg-bufferize -std-bufferize -tensor-constant-bufferize \
 // RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize  -convert-linalg-to-loops -convert-scf-to-std \
-// RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
@@ -9,7 +9,7 @@
 // RUN: -scf-bufferize -std-bufferize -tensor-constant-bufferize -tensor-bufferize \
 // RUN: -func-bufferize \
 // RUN: -finalizing-bufferize -convert-linalg-to-loops -convert-scf-to-std -convert-scf-to-std \
-// RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
+// RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-std --convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir
index 9a457c7b0a279..644553fde3074 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir
@@ -3,7 +3,7 @@
 // RUN:   --convert-linalg-to-loops --convert-vector-to-scf --convert-scf-to-std \
 // RUN:   --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
 // RUN:   --std-bufferize --finalizing-bufferize  \
-// RUN:   --convert-vector-to-llvm --convert-std-to-llvm | \
+// RUN:   --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm | \
 // RUN: TENSOR0="%mlir_integration_test_dir/data/test.mtx" \
 // RUN: TENSOR1="%mlir_integration_test_dir/data/zero.mtx" \
 // RUN: mlir-cpu-runner \

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
index 2b52170aa8b32..db2b5b7cca020 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
@@ -3,7 +3,7 @@
 // RUN:   --convert-linalg-to-loops --convert-vector-to-scf --convert-scf-to-std \
 // RUN:   --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
 // RUN:   --std-bufferize --finalizing-bufferize  \
-// RUN:   --convert-vector-to-llvm --convert-std-to-llvm | \
+// RUN:   --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm | \
 // RUN: TENSOR0="%mlir_integration_test_dir/data/test.tns" \
 // RUN: mlir-cpu-runner \
 // RUN:  -e entry -entry-point-result=void  \

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
index 712b6c533026b..039ca3c900b53 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
@@ -3,7 +3,7 @@
 // RUN:   --convert-linalg-to-loops --convert-vector-to-scf --convert-scf-to-std \
 // RUN:   --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
 // RUN:   --std-bufferize --finalizing-bufferize  \
-// RUN:   --convert-vector-to-llvm --convert-std-to-llvm | \
+// RUN:   --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm | \
 // RUN: TENSOR0="%mlir_integration_test_dir/data/wide.mtx" \
 // RUN: mlir-cpu-runner \
 // RUN:  -e entry -entry-point-result=void  \

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
index 69e8fb454485f..538d169ba8f44 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
@@ -3,7 +3,7 @@
 // RUN:   --convert-linalg-to-loops --convert-vector-to-scf --convert-scf-to-std \
 // RUN:   --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
 // RUN:   --std-bufferize --finalizing-bufferize  \
-// RUN:   --convert-vector-to-llvm --convert-std-to-llvm | \
+// RUN:   --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm | \
 // RUN: TENSOR0="%mlir_integration_test_dir/data/mttkrp_b.tns" \
 // RUN: mlir-cpu-runner \
 // RUN:  -e entry -entry-point-result=void  \

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir
index fc196f2999ce5..29fedafc6b303 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir
@@ -3,7 +3,7 @@
 // RUN:   --convert-linalg-to-loops --convert-vector-to-scf --convert-scf-to-std \
 // RUN:   --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
 // RUN:   --std-bufferize --finalizing-bufferize  \
-// RUN:   --convert-vector-to-llvm --convert-std-to-llvm | \
+// RUN:   --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm | \
 // RUN: TENSOR0="%mlir_integration_test_dir/data/test.mtx" \
 // RUN: mlir-cpu-runner \
 // RUN:  -e entry -entry-point-result=void  \

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir
index ae22e2896d701..1ed0b0b1a616d 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir
@@ -3,7 +3,7 @@
 // RUN:   --convert-linalg-to-loops --convert-vector-to-scf --convert-scf-to-std \
 // RUN:   --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
 // RUN:   --std-bufferize --finalizing-bufferize  \
-// RUN:   --convert-vector-to-llvm --convert-std-to-llvm | \
+// RUN:   --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm | \
 // RUN: TENSOR0="%mlir_integration_test_dir/data/test.mtx" \
 // RUN: mlir-cpu-runner \
 // RUN:  -e entry -entry-point-result=void  \

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir
index cadf7c4ce8e30..35d875d6c1abe 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir
@@ -3,7 +3,7 @@
 // RUN:   --convert-linalg-to-loops --convert-vector-to-scf --convert-scf-to-std \
 // RUN:   --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
 // RUN:   --std-bufferize --finalizing-bufferize  \
-// RUN:   --convert-vector-to-llvm --convert-std-to-llvm | \
+// RUN:   --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm | \
 // RUN: TENSOR0="%mlir_integration_test_dir/data/wide.mtx" \
 // RUN: mlir-cpu-runner \
 // RUN:  -e entry -entry-point-result=void  \

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir
index aad77c7de4d7c..96bddefa56f89 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir
@@ -3,7 +3,7 @@
 // RUN:   --convert-linalg-to-loops --convert-vector-to-scf --convert-scf-to-std \
 // RUN:   --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
 // RUN:   --std-bufferize --finalizing-bufferize  \
-// RUN:   --convert-vector-to-llvm --convert-std-to-llvm | \
+// RUN:   --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm | \
 // RUN: TENSOR0="%mlir_integration_test_dir/data/test.mtx" \
 // RUN: mlir-cpu-runner \
 // RUN:  -e entry -entry-point-result=void  \

diff  --git a/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir b/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
index 9d625f3daead9..3c463081b4b1c 100644
--- a/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
+++ b/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -std-expand -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -std-expand -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Standard/CPU/test_subview.mlir b/mlir/test/Integration/Dialect/Standard/CPU/test_subview.mlir
index dace65911db8f..e2a51e9a990f2 100644
--- a/mlir/test/Integration/Dialect/Standard/CPU/test_subview.mlir
+++ b/mlir/test/Integration/Dialect/Standard/CPU/test_subview.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | FileCheck %s
 

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-compress.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-compress.mlir
index 6178d3e9a6ae6..38132bfbdaaad 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-compress.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-compress.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-expand.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-expand.mlir
index 3a66d7f8dcc74..66dc596b54042 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-expand.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-expand.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-gather.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-gather.mlir
index 9273d880207bb..0e30a0e3d0be1 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-gather.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-gather.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-maskedload.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-maskedload.mlir
index da4dd5a458814..c7b15c5b4c37f 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-maskedload.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-maskedload.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-maskedstore.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-maskedstore.mlir
index ce895c0070c52..c2ff669dc7e03 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-maskedstore.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-maskedstore.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-scatter.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-scatter.mlir
index abe67da8bd340..5e96520f152ea 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-scatter.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-scatter.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-dot-matvec.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-dot-matvec.mlir
index f4aee26273511..c96955763ae99 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-dot-matvec.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-dot-matvec.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-saxpy-jagged-matvec.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-saxpy-jagged-matvec.mlir
index dc3cdc0c3026b..bdbbc7514cf50 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-saxpy-jagged-matvec.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-sparse-saxpy-jagged-matvec.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir
index 7e1596ae31619..9186c3c85f937 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir
@@ -1,19 +1,19 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf='lower-permutation-maps=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf='lower-permutation-maps=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true lower-permutation-maps=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true lower-permutation-maps=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
index e3154e60b3290..60f1ee48777f9 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir
@@ -1,19 +1,19 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf='lower-permutation-maps=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf='lower-permutation-maps=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true lower-permutation-maps=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true lower-permutation-maps=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir
index 344167bae4677..ab44d3869ca56 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir
@@ -1,19 +1,19 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf='lower-permutation-maps=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf='lower-permutation-maps=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true lower-permutation-maps=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf='full-unroll=true lower-permutation-maps=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir
index 98d8132ec5ca5..5d3b60c7147fa 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf=full-unroll=true -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf=full-unroll=true -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir
index 5fdaeafe54482..4b8062a471678 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-// RUN: mlir-opt %s -convert-vector-to-scf=full-unroll=true -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-vector-to-scf=full-unroll=true -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext,%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-write.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-write.mlir
index 76ac355066687..c165a71d196d3 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-write.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-write.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
index a43cf3c4bfa6c..c3f015e222a54 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-vector-distribute.mlir
@@ -1,11 +1,11 @@
 // RUN: mlir-opt %s -test-vector-to-forloop -convert-vector-to-scf \
-// RUN:   -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
+// RUN:   -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void  \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
 // RUN: mlir-opt %s -convert-vector-to-scf -lower-affine \
-// RUN: -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | mlir-cpu-runner -e main \
+// RUN: -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | mlir-cpu-runner -e main \
 // RUN: -entry-point-result=void \
 // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext | \
 // RUN: FileCheck %s

diff  --git a/mlir/test/lib/Conversion/StandardToLLVM/CMakeLists.txt b/mlir/test/lib/Conversion/StandardToLLVM/CMakeLists.txt
index d5bedb0120cca..576775ed43b85 100644
--- a/mlir/test/lib/Conversion/StandardToLLVM/CMakeLists.txt
+++ b/mlir/test/lib/Conversion/StandardToLLVM/CMakeLists.txt
@@ -5,6 +5,7 @@ add_mlir_library(MLIRTestStandardToLLVM
   EXCLUDE_FROM_LIBMLIR
 
   LINK_LIBS PUBLIC
+  MLIRLLVMCommonConversion
   MLIRLLVMIR
   MLIRLLVMIRTransforms
   MLIRPass

diff  --git a/mlir/test/lib/Conversion/StandardToLLVM/TestConvertCallOp.cpp b/mlir/test/lib/Conversion/StandardToLLVM/TestConvertCallOp.cpp
index 532d5ea17c034..873a9a8cebb74 100644
--- a/mlir/test/lib/Conversion/StandardToLLVM/TestConvertCallOp.cpp
+++ b/mlir/test/lib/Conversion/StandardToLLVM/TestConvertCallOp.cpp
@@ -8,8 +8,8 @@
 
 #include "TestDialect.h"
 #include "TestTypes.h"
+#include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Pass/Pass.h"

diff  --git a/mlir/test/mlir-cpu-runner/async-value.mlir b/mlir/test/mlir-cpu-runner/async-value.mlir
index 878256ea3ad47..2af7e14f6a1f4 100644
--- a/mlir/test/mlir-cpu-runner/async-value.mlir
+++ b/mlir/test/mlir-cpu-runner/async-value.mlir
@@ -3,6 +3,7 @@
 // RUN:               -async-runtime-ref-counting-opt                          \
 // RUN:               -convert-async-to-llvm                                   \
 // RUN:               -convert-vector-to-llvm                                  \
+// RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN:     -e main -entry-point-result=void -O0                               \

diff  --git a/mlir/test/mlir-cpu-runner/async.mlir b/mlir/test/mlir-cpu-runner/async.mlir
index 0c37968d26acb..5578c3c5527cd 100644
--- a/mlir/test/mlir-cpu-runner/async.mlir
+++ b/mlir/test/mlir-cpu-runner/async.mlir
@@ -5,6 +5,7 @@
 // RUN:               -convert-linalg-to-loops                                 \
 // RUN:               -convert-scf-to-std                                      \
 // RUN:               -convert-linalg-to-llvm                                  \
+// RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \
 // RUN: | mlir-cpu-runner                                                      \
 // RUN:     -e main -entry-point-result=void -O0                               \

diff  --git a/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir b/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir
index dd644ddbe3220..555bf0322de10 100644
--- a/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir
+++ b/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-std-to-llvm='use-bare-ptr-memref-call-conv=1' | mlir-cpu-runner -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void | FileCheck %s
+// RUN: mlir-opt %s -convert-scf-to-std -convert-memref-to-llvm -convert-std-to-llvm='use-bare-ptr-memref-call-conv=1' | mlir-cpu-runner -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void | FileCheck %s
 
 // Verify bare pointer memref calling convention. `simple_add1_add2_test`
 // gets two 2xf32 memrefs, adds 1.0f to the first one and 2.0f to the second

diff  --git a/mlir/test/mlir-cpu-runner/copy.mlir b/mlir/test/mlir-cpu-runner/copy.mlir
index 508e7b264ae1c..f1fe1f07264cb 100644
--- a/mlir/test/mlir-cpu-runner/copy.mlir
+++ b/mlir/test/mlir-cpu-runner/copy.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-std-to-llvm \
+// RUN: mlir-opt %s -convert-scf-to-std -convert-memref-to-llvm -convert-std-to-llvm \
 // RUN: | mlir-cpu-runner -e main -entry-point-result=void \
 // RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/mlir-cpu-runner/global_memref.mlir b/mlir/test/mlir-cpu-runner/global_memref.mlir
index 1d2a2e02dfb26..ddbb7ecfb2dbe 100644
--- a/mlir/test/mlir-cpu-runner/global_memref.mlir
+++ b/mlir/test/mlir-cpu-runner/global_memref.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-std-to-llvm | mlir-cpu-runner -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
+// RUN: mlir-opt %s -convert-memref-to-llvm -convert-std-to-llvm | mlir-cpu-runner -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
 
 func private @print_memref_f32(memref<*xf32>) attributes { llvm.emit_c_interface }
 func private @print_memref_i32(memref<*xi32>) attributes { llvm.emit_c_interface }

diff  --git a/mlir/test/mlir-cpu-runner/memref_reinterpret_cast.mlir b/mlir/test/mlir-cpu-runner/memref_reinterpret_cast.mlir
index b7442d5f57898..0ab06896b5b10 100644
--- a/mlir/test/mlir-cpu-runner/memref_reinterpret_cast.mlir
+++ b/mlir/test/mlir-cpu-runner/memref_reinterpret_cast.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -convert-std-to-llvm \
+// RUN: mlir-opt %s -convert-scf-to-std -convert-memref-to-llvm -convert-std-to-llvm \
 // RUN: | mlir-cpu-runner -e main -entry-point-result=void \
 // RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/mlir-cpu-runner/memref_reshape.mlir b/mlir/test/mlir-cpu-runner/memref_reshape.mlir
index f6d01d9e6af91..a6323128f9fec 100644
--- a/mlir/test/mlir-cpu-runner/memref_reshape.mlir
+++ b/mlir/test/mlir-cpu-runner/memref_reshape.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -std-expand -convert-std-to-llvm \
+// RUN: mlir-opt %s -convert-scf-to-std -std-expand -convert-memref-to-llvm -convert-std-to-llvm \
 // RUN: | mlir-cpu-runner -e main -entry-point-result=void \
 // RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
 // RUN: | FileCheck %s

diff  --git a/mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir b/mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir
index 63cc9ffc96b23..02c63acf54b0c 100644
--- a/mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir
+++ b/mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -convert-linalg-to-loops -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm %s | mlir-cpu-runner -O3 -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
+// RUN: mlir-opt -convert-linalg-to-loops -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm %s | mlir-cpu-runner -O3 -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
 
 func @main() {
   %A = memref.alloc() : memref<16x16xf32>

diff  --git a/mlir/test/mlir-cpu-runner/unranked_memref.mlir b/mlir/test/mlir-cpu-runner/unranked_memref.mlir
index 5a9e1da574336..c830f3bba6b2c 100644
--- a/mlir/test/mlir-cpu-runner/unranked_memref.mlir
+++ b/mlir/test/mlir-cpu-runner/unranked_memref.mlir
@@ -1,6 +1,7 @@
 // RUN: mlir-opt %s -convert-linalg-to-loops \
 // RUN:             -convert-scf-to-std      \
 // RUN:             -convert-linalg-to-llvm  \
+// RUN:             -convert-memref-to-llvm  \
 // RUN:             -convert-std-to-llvm |   \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s

diff  --git a/mlir/test/mlir-cpu-runner/utils.mlir b/mlir/test/mlir-cpu-runner/utils.mlir
index 29adf7b9a84c4..8edb780cf7989 100644
--- a/mlir/test/mlir-cpu-runner/utils.mlir
+++ b/mlir/test/mlir-cpu-runner/utils.mlir
@@ -1,7 +1,7 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -convert-std-to-llvm | mlir-cpu-runner -e print_0d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-0D
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -convert-std-to-llvm | mlir-cpu-runner -e print_1d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-1D
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -convert-std-to-llvm | mlir-cpu-runner -e print_3d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-3D
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -convert-std-to-llvm | mlir-cpu-runner -e vector_splat_2d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-VECTOR-SPLAT-2D
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | mlir-cpu-runner -e print_0d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-0D
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | mlir-cpu-runner -e print_1d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-1D
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | mlir-cpu-runner -e print_3d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-3D
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -convert-memref-to-llvm -convert-std-to-llvm | mlir-cpu-runner -e vector_splat_2d -entry-point-result=void -shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-VECTOR-SPLAT-2D
 
 func @print_0d() {
   %f = constant 2.00000e+00 : f32

diff  --git a/mlir/test/python/execution_engine.py b/mlir/test/python/execution_engine.py
index 040cc40cb61b1..e8a6fe1cd2a5f 100644
--- a/mlir/test/python/execution_engine.py
+++ b/mlir/test/python/execution_engine.py
@@ -56,7 +56,7 @@ def testInvalidModule():
 
 def lowerToLLVM(module):
   import mlir.conversions
-  pm = PassManager.parse("convert-std-to-llvm")
+  pm = PassManager.parse("convert-memref-to-llvm,convert-std-to-llvm")
   pm.run(module)
   return module
 

diff  --git a/mlir/test/python/integration/dialects/linalg/opsrun.py b/mlir/test/python/integration/dialects/linalg/opsrun.py
index 8ec4b6c44da20..b31a979e95df3 100644
--- a/mlir/test/python/integration/dialects/linalg/opsrun.py
+++ b/mlir/test/python/integration/dialects/linalg/opsrun.py
@@ -127,7 +127,7 @@ def transform(module, boilerplate):
       boilerplate)
   pm = PassManager.parse("func(convert-linalg-to-loops, lower-affine, " +
                          "convert-scf-to-std), convert-vector-to-llvm," +
-                         "convert-std-to-llvm")
+                         "convert-memref-to-llvm,convert-std-to-llvm")
   pm.run(mod)
   return mod
 

diff  --git a/mlir/tools/mlir-vulkan-runner/CMakeLists.txt b/mlir/tools/mlir-vulkan-runner/CMakeLists.txt
index 888e6b2c6cd38..c65e95270dcc4 100644
--- a/mlir/tools/mlir-vulkan-runner/CMakeLists.txt
+++ b/mlir/tools/mlir-vulkan-runner/CMakeLists.txt
@@ -64,6 +64,7 @@ if (MLIR_ENABLE_VULKAN_RUNNER)
     MLIRLLVMCommonConversion
     MLIRLLVMToLLVMIRTranslation
     MLIRMemRef
+    MLIRMemRefToLLVM
     MLIRParser
     MLIRSPIRV
     MLIRSPIRVTransforms

diff  --git a/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp b/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp
index 9cac0ee3440b5..f203a5d6f466e 100644
--- a/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp
+++ b/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp
@@ -15,6 +15,7 @@
 #include "mlir/Conversion/GPUToSPIRV/GPUToSPIRVPass.h"
 #include "mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h"
 #include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
 #include "mlir/Conversion/StandardToSPIRV/StandardToSPIRVPass.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
@@ -50,6 +51,7 @@ static LogicalResult runMLIRPasses(ModuleOp module) {
   passManager.addPass(createConvertGpuLaunchFuncToVulkanLaunchFuncPass());
   LowerToLLVMOptions llvmOptions(module.getContext(), DataLayout(module));
   llvmOptions.emitCWrappers = true;
+  passManager.addPass(createMemRefToLLVMPass());
   passManager.addPass(createLowerToLLVMPass(llvmOptions));
   passManager.addPass(createConvertVulkanLaunchFuncToVulkanCallsPass());
   return passManager.run(module);

diff  --git a/mlir/unittests/ExecutionEngine/CMakeLists.txt b/mlir/unittests/ExecutionEngine/CMakeLists.txt
index 14a2ffe0e242b..b2615f7353404 100644
--- a/mlir/unittests/ExecutionEngine/CMakeLists.txt
+++ b/mlir/unittests/ExecutionEngine/CMakeLists.txt
@@ -7,6 +7,7 @@ target_link_libraries(MLIRExecutionEngineTests
   PRIVATE
   MLIRExecutionEngine
   MLIRLinalgToLLVM
+  MLIRMemRefToLLVM
   ${dialect_libs}
 
 )

diff  --git a/mlir/unittests/ExecutionEngine/Invoke.cpp b/mlir/unittests/ExecutionEngine/Invoke.cpp
index 853451a13d2b5..017f47d747731 100644
--- a/mlir/unittests/ExecutionEngine/Invoke.cpp
+++ b/mlir/unittests/ExecutionEngine/Invoke.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h"
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
 #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
 #include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
@@ -39,6 +40,7 @@ static struct LLVMInitializer {
 /// dialects lowering to LLVM Dialect.
 static LogicalResult lowerToLLVMDialect(ModuleOp module) {
   PassManager pm(module.getContext());
+  pm.addPass(mlir::createMemRefToLLVMPass());
   pm.addPass(mlir::createLowerToLLVMPass());
   return pm.run(module);
 }