[llvm] f88f8fd - [mlir] Add a generic mem2reg implementation.

Wed Apr 26 23:12:41 PDT 2023

Author: Théo Degioanni
Date: 2023-04-27T06:00:48Z
New Revision: f88f8fd0bca34dc8e5571cb828b1a159a50fd504

URL: https://github.com/llvm/llvm-project/commit/f88f8fd0bca34dc8e5571cb828b1a159a50fd504
DIFF: https://github.com/llvm/llvm-project/commit/f88f8fd0bca34dc8e5571cb828b1a159a50fd504.diff

LOG: [mlir] Add a generic mem2reg implementation.

This patch introduces a generic implementation of mem2reg on
unstructured control-flow, along with a specialization for LLVM IR. This
is achieved by defining three new interfaces, representing 1. allocating
operations, 2. operations doing memory accesses, 3. operations that can
be rewired and/or deleted to stop using a specific use.

The file containing the core implementation of the algorithm
(`Mem2Reg.cpp`) contains a detailed explanation of how the algorithm
works. The contract for this pass is that given a memory slot with a
single non-aliased pointer, the pass will either remove all the uses of
the pointer or not change anything.

To help review this patch, I recommend starting by looking at the
interfaces defined in `Mem2Reg.td`, along with their reference
implementation for LLVM IR defined in `LLVMMem2Reg.cpp`. Then, the core
algorithm is implemented in `Mem2Reg.cpp`.

If this is all good I also have an implementation of the interfaces for
0-dimensional memref promotion that I can upstream afterwards.

Reviewed By: gysit

Differential Revision: https://reviews.llvm.org/D148109

Added: 
    mlir/include/mlir/Interfaces/Mem2RegInterfaces.h
    mlir/include/mlir/Interfaces/Mem2RegInterfaces.td
    mlir/include/mlir/Transforms/Mem2Reg.h
    mlir/lib/Dialect/LLVMIR/IR/LLVMMem2Reg.cpp
    mlir/lib/Interfaces/Mem2RegInterfaces.cpp
    mlir/lib/Transforms/Mem2Reg.cpp
    mlir/test/Transforms/mem2reg-llvmir-dbginfo.mlir
    mlir/test/Transforms/mem2reg-llvmir.mlir

Modified: 
    llvm/include/llvm/ADT/DenseMap.h
    mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
    mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
    mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
    mlir/include/mlir/Interfaces/CMakeLists.txt
    mlir/include/mlir/Transforms/Passes.h
    mlir/include/mlir/Transforms/Passes.td
    mlir/lib/Dialect/LLVMIR/CMakeLists.txt
    mlir/lib/Interfaces/CMakeLists.txt
    mlir/lib/Transforms/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h
index 8a680c031d302..3ef6a7cd1b4b5 100644

--- a/llvm/include/llvm/ADT/DenseMap.h
+++ b/llvm/include/llvm/ADT/DenseMap.h
@@ -312,6 +312,20 @@ class DenseMapBase : public DebugEpochBase {
       insert(*I);
   }
 
+  /// Returns the value associated to the key in the map if it exists. If it
+  /// does not exist, emplace a default value for the key and returns a
+  /// reference to the newly created value.
+  ValueT &getOrInsertDefault(KeyT &&Key) {
+    return try_emplace(Key).first->second;
+  }
+
+  /// Returns the value associated to the key in the map if it exists. If it
+  /// does not exist, emplace a default value for the key and returns a
+  /// reference to the newly created value.
+  ValueT &getOrInsertDefault(const KeyT &Key) {
+    return try_emplace(Key).first->second;
+  }
+
   bool erase(const KeyT &Val) {
     BucketT *TheBucket;
     if (!LookupBucketFor(Val, TheBucket))

diff  --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
index 9822c092ea473..728f95291a699 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
@@ -29,6 +29,7 @@
 #include "mlir/Interfaces/InferTypeOpInterface.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
 #include "mlir/Support/ThreadLocalCache.h"
+#include "mlir/Transforms/Mem2Reg.h"
 #include "llvm/ADT/PointerEmbeddedInt.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/LLVMContext.h"

diff  --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
index 6e35a3a9d2627..4b30d0c164c81 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
@@ -6,6 +6,7 @@ include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td"
 include "mlir/Dialect/LLVMIR/LLVMEnums.td"
 include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
+include "mlir/Interfaces/Mem2RegInterfaces.td"
 
 // Operations that correspond to LLVM intrinsics. With MLIR operation set being
 // extendable, there is no reason to introduce a hard boundary between "core"
@@ -214,7 +215,8 @@ def LLVM_NoAliasScopeDeclOp
 
 /// Base operation for lifetime markers. The LLVM intrinsics require the size
 /// operand to be an immediate. In MLIR it is encoded as an attribute.
-class LLVM_LifetimeBaseOp<string opName> : LLVM_ZeroResultIntrOp<opName> {
+class LLVM_LifetimeBaseOp<string opName> : LLVM_ZeroResultIntrOp<opName, [],
+    [DeclareOpInterfaceMethods<PromotableOpInterface>]> {
   let arguments = (ins I64Attr:$size, LLVM_AnyPointer:$ptr);
 
   // Custom builder to convert the size attribute to an integer.
@@ -322,7 +324,8 @@ def LLVM_CoroResumeOp : LLVM_IntrOp<"coro.resume", [], [], [], 0> {
 // Debug function intrinsics.
 //
 
-class LLVM_DbgIntrOp<string name, string argName> : LLVM_IntrOp<name, [], [], [], 0> {
+class LLVM_DbgIntrOp<string name, string argName, list<Trait> traits = []>
+    : LLVM_IntrOp<name, [], [], traits, 0> {
   let llvmBuilder = [{
     llvm::Module *module = builder.GetInsertBlock()->getModule();
     llvm::LLVMContext &ctx = module->getContext();
@@ -363,7 +366,8 @@ class LLVM_DbgIntrOp<string name, string argName> : LLVM_IntrOp<name, [], [], []
   }];
 }
 
-def LLVM_DbgDeclareOp : LLVM_DbgIntrOp<"dbg.declare", "addr"> {
+def LLVM_DbgDeclareOp : LLVM_DbgIntrOp< "dbg.declare", "addr",
+    [DeclareOpInterfaceMethods<PromotableOpInterface>]> {
   let summary = "Declare the address of a local debug info variable.";
   let arguments = (ins LLVM_AnyPointer:$addr, LLVM_DILocalVariableAttr:$varInfo);
 }

diff  --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 29b41ff122d97..f9025e01c1f69 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -22,6 +22,7 @@ include "mlir/IR/SymbolInterfaces.td"
 include "mlir/Interfaces/CallInterfaces.td"
 include "mlir/Interfaces/ControlFlowInterfaces.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
+include "mlir/Interfaces/Mem2RegInterfaces.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 
 class LLVM_Builder<string builder> {
@@ -171,7 +172,9 @@ def LLVM_FNegOp : LLVM_UnaryFloatArithmeticOp<
   LLVM_ScalarOrVectorOf<LLVM_AnyFloat>, "fneg", "FNeg">;
 
 // Memory-related operations.
-def LLVM_AllocaOp : LLVM_Op<"alloca">, LLVM_MemOpPatterns {
+def LLVM_AllocaOp : LLVM_Op<"alloca", 
+    [DeclareOpInterfaceMethods<PromotableAllocationOpInterface>]>,
+  LLVM_MemOpPatterns {
   let arguments = (ins AnyInteger:$arraySize,
                    OptionalAttr<I64Attr>:$alignment,
                    OptionalAttr<TypeAttr>:$elem_type,
@@ -228,7 +231,8 @@ def LLVM_AllocaOp : LLVM_Op<"alloca">, LLVM_MemOpPatterns {
   let hasVerifier = 1;
 }
 
-def LLVM_GEPOp : LLVM_Op<"getelementptr", [Pure]> {
+def LLVM_GEPOp : LLVM_Op<"getelementptr", [Pure,
+    DeclareOpInterfaceMethods<PromotableOpInterface>]> {
   let arguments = (ins LLVM_ScalarOrVectorOf<LLVM_AnyPointer>:$base,
                    Variadic<LLVM_ScalarOrVectorOf<AnyInteger>>:$dynamicIndices,
                    DenseI32ArrayAttr:$rawConstantIndices,
@@ -311,7 +315,8 @@ def LLVM_GEPOp : LLVM_Op<"getelementptr", [Pure]> {
   let hasVerifier = 1;
 }
 
-def LLVM_LoadOp : LLVM_MemAccessOpBase<"load"> {
+def LLVM_LoadOp : LLVM_MemAccessOpBase<"load",
+    [DeclareOpInterfaceMethods<PromotableMemOpInterface>]> {
   dag args = (ins Arg<LLVM_PointerTo<LLVM_LoadableType>, "", [MemRead]>:$addr,
               OptionalAttr<I64Attr>:$alignment,
               UnitAttr:$volatile_,
@@ -382,7 +387,8 @@ def LLVM_LoadOp : LLVM_MemAccessOpBase<"load"> {
   let hasVerifier = 1;
 }
 
-def LLVM_StoreOp : LLVM_MemAccessOpBase<"store"> {
+def LLVM_StoreOp : LLVM_MemAccessOpBase<"store",
+    [DeclareOpInterfaceMethods<PromotableMemOpInterface>]> {
   dag args = (ins LLVM_LoadableType:$value,
               Arg<LLVM_PointerTo<LLVM_LoadableType>,"",[MemWrite]>:$addr,
               OptionalAttr<I64Attr>:$alignment,
@@ -465,14 +471,15 @@ class LLVM_CastOp<string mnemonic, string instName, Type type,
       $_location, $_resultType, $arg);
   }];
 }
-def LLVM_BitcastOp : LLVM_CastOp<"bitcast", "BitCast",
-                                 LLVM_AnyNonAggregate, LLVM_AnyNonAggregate> {
+def LLVM_BitcastOp : LLVM_CastOp<"bitcast", "BitCast", LLVM_AnyNonAggregate,
+    LLVM_AnyNonAggregate, [DeclareOpInterfaceMethods<PromotableOpInterface>]> {
   let hasFolder = 1;
   let hasVerifier = 1;
 }
 def LLVM_AddrSpaceCastOp : LLVM_CastOp<"addrspacecast", "AddrSpaceCast",
-                                       LLVM_ScalarOrVectorOf<LLVM_AnyPointer>,
-                                       LLVM_ScalarOrVectorOf<LLVM_AnyPointer>> {
+    LLVM_ScalarOrVectorOf<LLVM_AnyPointer>,
+    LLVM_ScalarOrVectorOf<LLVM_AnyPointer>,
+    [DeclareOpInterfaceMethods<PromotableOpInterface>]> {
   let hasFolder = 1;
 }
 def LLVM_IntToPtrOp : LLVM_CastOp<"inttoptr", "IntToPtr",

diff  --git a/mlir/include/mlir/Interfaces/CMakeLists.txt b/mlir/include/mlir/Interfaces/CMakeLists.txt
index 2cb1d489c4bfd..4e0f7ac5a0400 100644
--- a/mlir/include/mlir/Interfaces/CMakeLists.txt
+++ b/mlir/include/mlir/Interfaces/CMakeLists.txt
@@ -7,6 +7,7 @@ add_mlir_interface(DestinationStyleOpInterface)
 add_mlir_interface(InferIntRangeInterface)
 add_mlir_interface(InferTypeOpInterface)
 add_mlir_interface(LoopLikeInterface)
+add_mlir_interface(Mem2RegInterfaces)
 add_mlir_interface(ParallelCombiningOpInterface)
 add_mlir_interface(RuntimeVerifiableOpInterface)
 add_mlir_interface(ShapedOpInterfaces)

diff  --git a/mlir/include/mlir/Interfaces/Mem2RegInterfaces.h b/mlir/include/mlir/Interfaces/Mem2RegInterfaces.h
new file mode 100644
index 0000000000000..c962d98624b42
--- /dev/null
+++ b/mlir/include/mlir/Interfaces/Mem2RegInterfaces.h
@@ -0,0 +1,39 @@
+//===-- Mem2RegInterfaces.h - Mem2Reg interfaces ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_MEM2REGINTERFACES_H
+#define MLIR_INTERFACES_MEM2REGINTERFACES_H
+
+#include "mlir/IR/Dominance.h"
+#include "mlir/IR/OpDefinition.h"
+
+namespace mlir {
+
+/// Represents a slot in memory. This is generated by an allocating operation
+/// (for example alloca).
+struct MemorySlot {
+  /// Pointer to the memory slot, used by operations to refer to it.
+  Value ptr;
+  /// Type of the value contained in the slot.
+  Type elemType;
+};
+
+/// Returned by operation promotion logic requesting the deletion of an
+/// operation.
+enum class DeletionKind {
+  /// Keep the operation after promotion.
+  Keep,
+  /// Delete the operation after promotion.
+  Delete,
+};
+
+} // namespace mlir
+
+#include "mlir/Interfaces/Mem2RegInterfaces.h.inc"
+
+#endif // MLIR_INTERFACES_MEM2REGINTERFACES_H

diff  --git a/mlir/include/mlir/Interfaces/Mem2RegInterfaces.td b/mlir/include/mlir/Interfaces/Mem2RegInterfaces.td
new file mode 100644
index 0000000000000..b0d0a8cb54bdd
--- /dev/null
+++ b/mlir/include/mlir/Interfaces/Mem2RegInterfaces.td
@@ -0,0 +1,196 @@
+//===-- Mem2RegInterfaces.td - Mem2Reg interfaces ----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_MEM2REGINTERFACES
+#define MLIR_INTERFACES_MEM2REGINTERFACES
+
+include "mlir/IR/OpBase.td"
+
+def PromotableAllocationOpInterface
+    : OpInterface<"PromotableAllocationOpInterface"> {
+  let description = [{
+    Describes an operation allocating a memory slot that can be promoted into
+    SSA values.
+  }];
+  let cppNamespace = "::mlir";
+
+  let methods = [
+    InterfaceMethod<[{
+        Returns a list of memory slots for which promotion should be attempted.
+        This only considers the local semantics of the allocator, ignoring
+        whether the slot pointer is properly used or not. This allocator is the
+        "owner" of the returned slots, meaning no two allocators should return
+        the same slot. The content of the memory slot must only be reachable
+        using loads and stores to the provided slot pointer, no aliasing is
+        allowed.
+
+        Promotion of the slot will lead to the slot pointer no longer being
+        used, leaving the content of the memory slot unreachable.
+      }], "::llvm::SmallVector<::mlir::MemorySlot>", "getPromotableSlots",
+      (ins)
+    >,
+    InterfaceMethod<[{
+        Provides the default Value of this memory slot. The provided Value
+        will be used as the reaching definition of loads done before any store.
+        This Value must outlive the promotion and dominate all the uses of this
+        slot's pointer. The provided builder can be used to create the default
+        value on the fly.
+
+        The builder is located at the beginning of the block where the slot
+        pointer is defined.
+      }], "::mlir::Value", "getDefaultValue",
+      (ins "const ::mlir::MemorySlot &":$slot, "::mlir::OpBuilder &":$builder)
+    >,
+    InterfaceMethod<[{
+        Hook triggered for every new block argument added to a block.
+        This will only be called for slots declared by this operation.
+
+        The builder is located at the beginning of the block on call.
+      }],
+      "void", "handleBlockArgument",
+      (ins
+        "const ::mlir::MemorySlot &":$slot,
+        "::mlir::BlockArgument":$argument,
+        "::mlir::OpBuilder &":$builder
+      )
+    >,
+    InterfaceMethod<[{
+        Hook triggered once the promotion of a slot is complete. This can
+        also clean up the created default value if necessary.
+        This will only be called for slots declared by this operation.
+      }],
+      "void", "handlePromotionComplete",
+      (ins "const ::mlir::MemorySlot &":$slot, "::mlir::Value":$defaultValue)
+    >,
+  ];
+}
+
+def PromotableMemOpInterface : OpInterface<"PromotableMemOpInterface"> {
+  let description = [{
+    Describes an operation that can load from memory slots and/or store
+    to memory slots. Loads and stores must be of whole values of the same
+    type as the slot itself.
+
+    If the same operation does both loads and stores on the same slot, the
+    load must semantically happen first.
+  }];
+  let cppNamespace = "::mlir";
+
+  let methods = [
+    InterfaceMethod<[{
+        Gets whether this operation loads from the specified slot.
+      }],
+      "bool", "loadsFrom",
+      (ins "const ::mlir::MemorySlot &":$slot)
+    >,
+    InterfaceMethod<[{
+        Gets the value stored to the provided memory slot, or returns a null
+        value if this operation does not store to this slot. An operation
+        storing a value to a slot must always be able to provide the value it
+        stores. This method is only called on operations that use the slot.
+      }],
+      "::mlir::Value", "getStored",
+      (ins "const ::mlir::MemorySlot &":$slot)
+    >,
+    InterfaceMethod<[{
+        Checks that this operation can be promoted to no longer use the provided
+        blocking uses, in the context of promoting `slot`.
+
+        If the removal procedure of the use will require that other uses get
+        removed, that dependency should be added to the `newBlockingUses`
+        argument. Dependent uses must only be uses of results of this operation.
+      }], "bool", "canUsesBeRemoved",
+      (ins "const ::mlir::MemorySlot &":$slot,
+           "const ::llvm::SmallPtrSetImpl<::mlir::OpOperand *> &":$blockingUses,
+           "::llvm::SmallVectorImpl<::mlir::OpOperand *> &":$newBlockingUses)
+    >,
+    InterfaceMethod<[{
+        Transforms IR to ensure that the current operation does not use the
+        provided memory slot anymore. `reachingDefinition` contains the value
+        currently stored in the provided memory slot, immediately before the
+        current operation.
+
+        During the transformation, *no operation should be deleted*.
+        The operation can only schedule its own deletion by returning the
+        appropriate `DeletionKind`. The deletion must be legal assuming the
+        blocking uses passed through the `newBlockingUses` list in
+        `canUseBeRemoved` have been removed.
+
+        After calling this method, the blocking uses should have disappeared
+        or this operation should have scheduled its own deletion.
+
+        This method will only be called after ensuring promotion is allowed via
+        `canUseBeRemoved`. The requested blocking use removal may or may not
+        have been done at the point of calling this method, but it will be done
+        eventually.
+
+        The builder is located after the promotable operation on call.
+      }],
+      "::mlir::DeletionKind",
+      "removeBlockingUses",
+      (ins "const ::mlir::MemorySlot &":$slot,
+           "const ::llvm::SmallPtrSetImpl<mlir::OpOperand *> &":$blockingUses,
+           "::mlir::OpBuilder &":$builder,
+           "::mlir::Value":$reachingDefinition)
+    >,
+  ];
+}
+
+def PromotableOpInterface : OpInterface<"PromotableOpInterface"> {
+  let description = [{
+    Describes an operation that can be transformed or deleted so it no longer
+    uses a provided value (blocking use), in case this would allow the promotion
+    of a memory slot.
+  }];
+  let cppNamespace = "::mlir";
+
+  let methods = [
+    InterfaceMethod<[{
+        Checks that this operation can be promoted to no longer use the provided
+        blocking uses, in the context of promoting `slot`.
+
+        If the removal procedure of the use will require that other uses get
+        removed, that dependency should be added to the `newBlockingUses`
+        argument. Dependent uses must only be uses of results of this operation.
+      }], "bool", "canUsesBeRemoved",
+      (ins "const ::mlir::MemorySlot &":$slot,
+           "const ::llvm::SmallPtrSetImpl<::mlir::OpOperand *> &":$blockingUses,
+           "::llvm::SmallVectorImpl<::mlir::OpOperand *> &":$newBlockingUses)
+    >,
+    InterfaceMethod<[{
+        Transforms IR to ensure that the current operation does not use the
+        provided memory slot anymore. In contrast to `PromotableMemOpInterface`,
+        operations implementing this interface must not need access to the
+        reaching definition of the content of the slot.
+
+        During the transformation, *no operation should be deleted*.
+        The operation can only schedule its own deletion by returning the
+        appropriate `DeletionKind`. The deletion must be legal assuming the
+        blocking uses passed through the `newBlockingUses` list in
+        `canUseBeRemoved` have been removed.
+
+        After calling this method, the blocking uses should have disappeared
+        or this operation should have scheduled its own deletion.
+
+        This method will only be called after ensuring promotion is allowed via
+        `canUseBeRemoved`. The requested blocking use removal may or may not
+        have been done at the point of calling this method, but it will be done
+        eventually.
+
+        The builder is located after the promotable operation on call.
+      }],
+      "::mlir::DeletionKind",
+      "removeBlockingUses",
+      (ins "const ::mlir::MemorySlot &":$slot,
+           "const ::llvm::SmallPtrSetImpl<mlir::OpOperand *> &":$blockingUses,
+           "::mlir::OpBuilder &":$builder)
+    >,
+  ];
+}
+
+#endif // MLIR_INTERFACES_MEM2REGINTERFACES

diff  --git a/mlir/include/mlir/Transforms/Mem2Reg.h b/mlir/include/mlir/Transforms/Mem2Reg.h
new file mode 100644
index 0000000000000..e2da88f7e00c7
--- /dev/null
+++ b/mlir/include/mlir/Transforms/Mem2Reg.h
@@ -0,0 +1,26 @@
+//===-- Mem2Reg.h - Mem2Reg definitions -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TRANSFORMS_MEM2REG_H
+#define MLIR_TRANSFORMS_MEM2REG_H
+
+#include "mlir/IR/Dominance.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/Interfaces/Mem2RegInterfaces.h"
+
+namespace mlir {
+
+/// Attempts to promote the memory slots of the provided allocators. Succeeds if
+/// at least one memory slot was promoted.
+LogicalResult
+tryToPromoteMemorySlots(ArrayRef<PromotableAllocationOpInterface> allocators,
+                        OpBuilder &builder, DominanceInfo &dominance);
+
+} // namespace mlir
+
+#endif // MLIR_TRANSFORMS_MEM2REG_H

diff  --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
index 1d95e256b9161..f5f76076c8e07 100644
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -33,6 +33,7 @@ class GreedyRewriteConfig;
 #define GEN_PASS_DECL_CSEPASS
 #define GEN_PASS_DECL_INLINER
 #define GEN_PASS_DECL_LOOPINVARIANTCODEMOTION
+#define GEN_PASS_DECL_MEM2REG
 #define GEN_PASS_DECL_PRINTIRPASS
 #define GEN_PASS_DECL_PRINTOPSTATS
 #define GEN_PASS_DECL_STRIPDEBUGINFO

diff  --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index cecefb35466bc..1cc357ca1f9f4 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -172,6 +172,25 @@ def LoopInvariantCodeMotion : Pass<"loop-invariant-code-motion"> {
   let constructor = "mlir::createLoopInvariantCodeMotionPass()";
 }
 
+def Mem2Reg : Pass<"mem2reg"> {
+  let summary = "Promotes memory slots into values.";
+  let description = [{
+    This pass removes loads out of and stores into a memory slot, and turns
+    them into direct uses of SSA values. This is done generically using the
+    `PromoteAllocationOpInterface`, `PromoteOpInterface` and
+    `PromoteMemOpInterface` interfaces.
+
+    This pass will attempt to compute which definitions of the content of
+    the memory slot reach operations that use the memory slot pointer. It
+    will rewire or remove operations that use the slot pointer so they no
+    longer use it. If any of this is not possible, the IR will be left
+    without mutation.
+
+    This pass only supports unstructured control-flow. Promotion of operations
+    within subregions will not happen.
+  }];
+}
+
 def PrintOpStats : Pass<"print-op-stats"> {
   let summary = "Print statistics of operations";
   let constructor = "mlir::createPrintOpStatsPass()";

diff  --git a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
index 7e631e7ddc802..9ba03153536c6 100644
--- a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
+++ b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
@@ -6,6 +6,7 @@ add_mlir_dialect_library(MLIRLLVMDialect
   IR/LLVMDialect.cpp
   IR/LLVMInlining.cpp
   IR/LLVMInterfaces.cpp
+  IR/LLVMMem2Reg.cpp
   IR/LLVMTypes.cpp
   IR/LLVMTypeSyntax.cpp
 
@@ -33,6 +34,7 @@ add_mlir_dialect_library(MLIRLLVMDialect
   MLIRDataLayoutInterfaces
   MLIRInferTypeOpInterface
   MLIRIR
+  MLIRMem2RegInterfaces
   MLIRSideEffectInterfaces
   MLIRSupport
   )

diff  --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMem2Reg.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMem2Reg.cpp
new file mode 100644
index 0000000000000..7fa8ebceed5fb
--- /dev/null
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMem2Reg.cpp
@@ -0,0 +1,210 @@
+//===- LLVMMem2Reg.cpp - Mem2Reg Interfaces ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Mem2Reg-related interfaces for LLVM dialect operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Interfaces/Mem2RegInterfaces.h"
+
+using namespace mlir;
+
+//===----------------------------------------------------------------------===//
+// Interfaces for AllocaOp
+//===----------------------------------------------------------------------===//
+
+llvm::SmallVector<MemorySlot> LLVM::AllocaOp::getPromotableSlots() {
+  if (!getOperation()->getBlock()->isEntryBlock())
+    return {};
+
+  Type elemType =
+      getElemType() ? *getElemType() : getResult().getType().getElementType();
+  return {MemorySlot{getResult(), elemType}};
+}
+
+Value LLVM::AllocaOp::getDefaultValue(const MemorySlot &slot,
+                                      OpBuilder &builder) {
+  return builder.create<LLVM::UndefOp>(getLoc(), slot.elemType);
+}
+
+void LLVM::AllocaOp::handleBlockArgument(const MemorySlot &slot,
+                                         BlockArgument argument,
+                                         OpBuilder &builder) {
+  for (Operation *user : getOperation()->getUsers())
+    if (auto declareOp = llvm::dyn_cast<LLVM::DbgDeclareOp>(user))
+      builder.create<LLVM::DbgValueOp>(declareOp.getLoc(), argument,
+                                       declareOp.getVarInfo());
+}
+
+void LLVM::AllocaOp::handlePromotionComplete(const MemorySlot &slot,
+                                             Value defaultValue) {
+  if (defaultValue && defaultValue.use_empty())
+    defaultValue.getDefiningOp()->erase();
+  erase();
+}
+
+//===----------------------------------------------------------------------===//
+// Interfaces for LoadOp/StoreOp
+//===----------------------------------------------------------------------===//
+
+bool LLVM::LoadOp::loadsFrom(const MemorySlot &slot) {
+  return getAddr() == slot.ptr;
+}
+
+Value LLVM::LoadOp::getStored(const MemorySlot &slot) { return {}; }
+
+bool LLVM::StoreOp::loadsFrom(const MemorySlot &slot) { return false; }
+
+Value LLVM::StoreOp::getStored(const MemorySlot &slot) {
+  return getAddr() == slot.ptr ? getValue() : Value();
+}
+
+bool LLVM::LoadOp::canUsesBeRemoved(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    SmallVectorImpl<OpOperand *> &newBlockingUses) {
+  if (blockingUses.size() != 1)
+    return false;
+  Value blockingUse = (*blockingUses.begin())->get();
+  // If the blocking use is the slot ptr itself, there will be enough
+  // context to reconstruct the result of the load at removal time, so it can
+  // be removed (provided it loads the exact stored value and is not
+  // volatile).
+  return blockingUse == slot.ptr && getAddr() == slot.ptr &&
+         getResult().getType() == slot.elemType && !getVolatile_();
+}
+
+DeletionKind LLVM::LoadOp::removeBlockingUses(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    OpBuilder &builder, Value reachingDefinition) {
+  // `canUsesBeRemoved` checked this blocking use must be the loaded slot
+  // pointer.
+  getResult().replaceAllUsesWith(reachingDefinition);
+  return DeletionKind::Delete;
+}
+
+bool LLVM::StoreOp::canUsesBeRemoved(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    SmallVectorImpl<OpOperand *> &newBlockingUses) {
+  if (blockingUses.size() != 1)
+    return false;
+  Value blockingUse = (*blockingUses.begin())->get();
+  // If the blocking use is the slot ptr itself, dropping the store is
+  // fine, provided we are currently promoting its target value. Don't allow a
+  // store OF the slot pointer, only INTO the slot pointer.
+  return blockingUse == slot.ptr && getAddr() == slot.ptr &&
+         getValue() != slot.ptr && getValue().getType() == slot.elemType &&
+         !getVolatile_();
+}
+
+DeletionKind LLVM::StoreOp::removeBlockingUses(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    OpBuilder &builder, Value reachingDefinition) {
+  // `canUsesBeRemoved` checked this blocking use must be the stored slot
+  // pointer.
+  for (Operation *user : slot.ptr.getUsers())
+    if (auto declareOp = llvm::dyn_cast<LLVM::DbgDeclareOp>(user))
+      builder.create<LLVM::DbgValueOp>(declareOp->getLoc(), getValue(),
+                                       declareOp.getVarInfo());
+  return DeletionKind::Delete;
+}
+
+//===----------------------------------------------------------------------===//
+// Interfaces for discardable OPs
+//===----------------------------------------------------------------------===//
+
+/// Conditions the deletion of the operation to the removal of all its uses.
+static bool forwardToUsers(Operation *op,
+                           SmallVectorImpl<OpOperand *> &newBlockingUses) {
+  for (Value result : op->getResults())
+    for (OpOperand &use : result.getUses())
+      newBlockingUses.push_back(&use);
+  return true;
+}
+
+bool LLVM::BitcastOp::canUsesBeRemoved(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    SmallVectorImpl<OpOperand *> &newBlockingUses) {
+  return forwardToUsers(*this, newBlockingUses);
+}
+
+DeletionKind LLVM::BitcastOp::removeBlockingUses(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    OpBuilder &builder) {
+  return DeletionKind::Delete;
+}
+
+bool LLVM::AddrSpaceCastOp::canUsesBeRemoved(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    SmallVectorImpl<OpOperand *> &newBlockingUses) {
+  return forwardToUsers(*this, newBlockingUses);
+}
+
+DeletionKind LLVM::AddrSpaceCastOp::removeBlockingUses(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    OpBuilder &builder) {
+  return DeletionKind::Delete;
+}
+
+bool LLVM::LifetimeStartOp::canUsesBeRemoved(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    SmallVectorImpl<OpOperand *> &newBlockingUses) {
+  return true;
+}
+
+DeletionKind LLVM::LifetimeStartOp::removeBlockingUses(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    OpBuilder &builder) {
+  return DeletionKind::Delete;
+}
+
+bool LLVM::LifetimeEndOp::canUsesBeRemoved(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    SmallVectorImpl<OpOperand *> &newBlockingUses) {
+  return true;
+}
+
+DeletionKind LLVM::LifetimeEndOp::removeBlockingUses(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    OpBuilder &builder) {
+  return DeletionKind::Delete;
+}
+
+bool LLVM::DbgDeclareOp::canUsesBeRemoved(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    SmallVectorImpl<OpOperand *> &newBlockingUses) {
+  return true;
+}
+
+DeletionKind LLVM::DbgDeclareOp::removeBlockingUses(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    OpBuilder &builder) {
+  return DeletionKind::Delete;
+}
+
+static bool hasAllZeroIndices(LLVM::GEPOp gepOp) {
+  return llvm::all_of(gepOp.getIndices(), [](auto index) {
+    auto indexAttr = index.template dyn_cast<IntegerAttr>();
+    return indexAttr && indexAttr.getValue() == 0;
+  });
+}
+
+bool LLVM::GEPOp::canUsesBeRemoved(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    SmallVectorImpl<OpOperand *> &newBlockingUses) {
+  // GEP can be removed as long as it is a no-op and its users can be removed.
+  if (!hasAllZeroIndices(*this))
+    return false;
+  return forwardToUsers(*this, newBlockingUses);
+}
+
+DeletionKind LLVM::GEPOp::removeBlockingUses(
+    const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+    OpBuilder &builder) {
+  return DeletionKind::Delete;
+}

diff  --git a/mlir/lib/Interfaces/CMakeLists.txt b/mlir/lib/Interfaces/CMakeLists.txt
index 20073e7030557..dbf6e69a45255 100644
--- a/mlir/lib/Interfaces/CMakeLists.txt
+++ b/mlir/lib/Interfaces/CMakeLists.txt
@@ -9,6 +9,7 @@ set(LLVM_OPTIONAL_SOURCES
   InferIntRangeInterface.cpp
   InferTypeOpInterface.cpp
   LoopLikeInterface.cpp
+  Mem2RegInterfaces.cpp
   ParallelCombiningOpInterface.cpp
   RuntimeVerifiableOpInterface.cpp
   ShapedOpInterfaces.cpp
@@ -45,6 +46,7 @@ add_mlir_interface_library(DestinationStyleOpInterface)
 add_mlir_interface_library(InferIntRangeInterface)
 add_mlir_interface_library(InferTypeOpInterface)
 add_mlir_interface_library(LoopLikeInterface)
+add_mlir_interface_library(Mem2RegInterfaces)
 add_mlir_interface_library(ParallelCombiningOpInterface)
 add_mlir_interface_library(RuntimeVerifiableOpInterface)
 add_mlir_interface_library(ShapedOpInterfaces)

diff  --git a/mlir/lib/Interfaces/Mem2RegInterfaces.cpp b/mlir/lib/Interfaces/Mem2RegInterfaces.cpp
new file mode 100644
index 0000000000000..aadd76b44df53
--- /dev/null
+++ b/mlir/lib/Interfaces/Mem2RegInterfaces.cpp
@@ -0,0 +1,11 @@
+//===-- Mem2RegInterfaces.cpp - Mem2Reg interfaces --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Interfaces/Mem2RegInterfaces.h"
+
+#include "mlir/Interfaces/Mem2RegInterfaces.cpp.inc"

diff  --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt
index 4929a20847a78..7b4fb4d6df881 100644
--- a/mlir/lib/Transforms/CMakeLists.txt
+++ b/mlir/lib/Transforms/CMakeLists.txt
@@ -8,6 +8,7 @@ add_mlir_library(MLIRTransforms
   Inliner.cpp
   LocationSnapshot.cpp
   LoopInvariantCodeMotion.cpp
+  Mem2Reg.cpp
   OpStats.cpp
   PrintIR.cpp
   SCCP.cpp
@@ -27,6 +28,7 @@ add_mlir_library(MLIRTransforms
   MLIRAnalysis
   MLIRCopyOpInterface
   MLIRLoopLikeInterface
+  MLIRMem2RegInterfaces
   MLIRPass
   MLIRRuntimeVerifiableOpInterface
   MLIRSideEffectInterfaces

diff  --git a/mlir/lib/Transforms/Mem2Reg.cpp b/mlir/lib/Transforms/Mem2Reg.cpp
new file mode 100644
index 0000000000000..5952de9ddb63b
--- /dev/null
+++ b/mlir/lib/Transforms/Mem2Reg.cpp
@@ -0,0 +1,562 @@
+//===- Mem2Reg.cpp - Promotes memory slots into values ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Transforms/Mem2Reg.h"
+#include "mlir/Analysis/SliceAnalysis.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Interfaces/ControlFlowInterfaces.h"
+#include "mlir/Transforms/Passes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/GenericIteratedDominanceFrontier.h"
+
+namespace mlir {
+#define GEN_PASS_DEF_MEM2REG
+#include "mlir/Transforms/Passes.h.inc"
+} // namespace mlir
+
+using namespace mlir;
+
+/// mem2reg
+///
+/// This pass turns unnecessary uses of automatically allocated memory slots
+/// into direct Value-based operations. For example, it will simplify storing a
+/// constant in a memory slot to immediately load it to a direct use of that
+/// constant. In other words, given a memory slot addressed by a non-aliased
+/// "pointer" Value, mem2reg removes all the uses of that pointer.
+///
+/// Within a block, this is done by following the chain of stores and loads of
+/// the slot and replacing the results of loads with the values previously
+/// stored. If a load happens before any other store, a poison value is used
+/// instead.
+///
+/// Control flow can create situations where a load could be replaced by
+/// multiple possible stores depending on the control flow path taken. As a
+/// result, this pass must introduce new block arguments in some blocks to
+/// accomodate for the multiple possible definitions. Each predecessor will
+/// populate the block argument with the definition reached at its end. With
+/// this, the value stored can be well defined at block boundaries, allowing
+/// the propagation of replacement through blocks.
+///
+/// This pass computes this transformation in four main steps:
+/// - A first step computes the list of operations that transitively use the
+/// memory slot we would like to promote. The purpose of this phase is to
+/// identify which uses must be removed to promote the slot, either by rewiring
+/// the user or deleting it. Naturally, direct uses of the slot must be removed.
+/// Sometimes additional uses must also be removed: this is notably the case
+/// when a direct user of the slot cannot rewire its use and must delete itself,
+/// and thus must make its users no longer use it. If any of those uses cannot
+/// be removed by their users in any way, promotion cannot continue: this is
+/// decided at this step.
+/// - A second step computes the list of blocks where a block argument will be
+/// needed ("merge points") without mutating the IR. These blocks are the blocks
+/// leading to a definition clash between two predecessors. Such blocks happen
+/// to be the Iterated Dominance Frontier (IDF) of the set of blocks containing
+/// a store, as they represent the point where a clear defining dominator stops
+/// existing. Computing this information in advance allows making sure the
+/// terminators that will forward values are capable of doing so (inability to
+/// do so aborts promotion at this step).
+/// - A third step computes the reaching definition of the memory slot at each
+/// blocking user. This is the core of the mem2reg algorithm, also known as
+/// load-store forwarding. This analyses loads and stores and propagates which
+/// value must be stored in the slot at each blocking user.  This is achieved by
+/// doing a depth-first walk of the dominator tree of the function. This is
+/// sufficient because the reaching definition at the beginning of a block is
+/// either its new block argument if it is a merge block, or the definition
+/// reaching the end of its immediate dominator (parent in the dominator tree).
+/// We can therefore propagate this information down the dominator tree to
+/// proceed with renaming within blocks.
+/// - The final fourth step uses the reaching definition to remove blocking uses
+/// in topological order.
+///
+/// The two first steps do not mutate IR because promotion can still be aborted
+/// at this point. Once the two last steps are reached, promotion is guaranteed
+/// to succeed, allowing to start mutating IR.
+///
+/// For further reading, chapter three of SSA-based Compiler Design [1]
+/// showcases SSA construction, where mem2reg is an adaptation of the same
+/// process.
+///
+/// [1]: Rastello F. & Bouchez Tichadou F., SSA-based Compiler Design (2022),
+///      Springer.
+
+namespace {
+
+/// The SlotPromoter handles the state of promoting a memory slot. It wraps a
+/// slot and its associated allocator, along with analysis results related to
+/// the slot.
+class SlotPromoter {
+public:
+  SlotPromoter(MemorySlot slot, PromotableAllocationOpInterface allocator,
+               OpBuilder &builder, DominanceInfo &dominance);
+
+  /// Prepare data for the promotion of the slot while checking if it can be
+  /// promoted. Succeeds if the slot can be promoted. This method does not
+  /// mutate IR.
+  LogicalResult prepareSlotPromotion();
+
+  /// Actually promotes the slot by mutating IR. This method must only be
+  /// called after a successful call to `SlotPromoter::prepareSlotPromotion`.
+  /// Promoting a slot does not invalidate the preparation of other slots.
+  void promoteSlot();
+
+private:
+  /// This is the first step of the promotion algorithm.
+  /// Computes the transitive uses of the slot that block promotion. This finds
+  /// uses that would block the promotion, checks that the operation has a
+  /// solution to remove the blocking use, and potentially forwards the analysis
+  /// if the operation needs further blocking uses resolved to resolve its own
+  /// uses (typically, removing its users because it will delete itself to
+  /// resolve its own blocking uses). This will fail if one of the transitive
+  /// users cannot remove a requested use, and should prevent promotion.
+  LogicalResult computeBlockingUses();
+
+  /// Computes in which blocks the value stored in the slot is actually used,
+  /// meaning blocks leading to a load. This method uses `definingBlocks`, the
+  /// set of blocks containing a store to the slot (defining the value of the
+  /// slot).
+  SmallPtrSet<Block *, 16>
+  computeSlotLiveIn(SmallPtrSetImpl<Block *> &definingBlocks);
+
+  /// This is the second step of the promotion algorithm.
+  /// Computes the points in which multiple re-definitions of the slot's value
+  /// (stores) may conflict.
+  void computeMergePoints();
+
+  /// Ensures predecessors of merge points can properly provide their current
+  /// definition of the value stored in the slot to the merge point. This can
+  /// notably be an issue if the terminator used does not have the ability to
+  /// forward values through block operands.
+  bool areMergePointsUsable();
+
+  /// Computes the reaching definition for all the operations that require
+  /// promotion. `reachingDef` is the value the slot should contain at the
+  /// beginning of the block. This method returns the reached definition at the
+  /// end of the block.
+  Value computeReachingDefInBlock(Block *block, Value reachingDef);
+
+  /// This is the third step of the promotion algorithm.
+  /// Computes the reaching definition for all the operations that require
+  /// promotion. `reachingDef` corresponds to the initial value the
+  /// slot will contain before any write, typically a poison value.
+  void computeReachingDefInRegion(Region *region, Value reachingDef);
+
+  /// This is the fourth step of the promotion algorithm.
+  /// Removes the blocking uses of the slot, in topological order.
+  void removeBlockingUses();
+
+  /// Lazily-constructed default value representing the content of the slot when
+  /// no store has been executed. This function may mutate IR.
+  Value getLazyDefaultValue();
+
+  MemorySlot slot;
+  PromotableAllocationOpInterface allocator;
+  OpBuilder &builder;
+  /// Potentially non-initialized default value. Use `lazyDefaultValue` to
+  /// initialize it on demand.
+  Value defaultValue;
+  /// Blocks where multiple definitions of the slot value clash.
+  SmallPtrSet<Block *, 8> mergePoints;
+  /// Contains, for each operation, which uses must be eliminated by promotion.
+  /// This is a DAG structure because an operation that must eliminate some of
+  /// its uses always comes from a request from an operation that must
+  /// eliminate some of its own uses.
+  DenseMap<Operation *, SmallPtrSet<OpOperand *, 4>> userToBlockingUses;
+  /// Contains the reaching definition at this operation. Reaching definitions
+  /// are only computed for promotable memory operations with blocking uses.
+  DenseMap<PromotableMemOpInterface, Value> reachingDefs;
+  DominanceInfo &dominance;
+};
+
+} // namespace
+
+SlotPromoter::SlotPromoter(MemorySlot slot,
+                           PromotableAllocationOpInterface allocator,
+                           OpBuilder &builder, DominanceInfo &dominance)
+    : slot(slot), allocator(allocator), builder(builder), dominance(dominance) {
+  bool isResultOrNewBlockArgument = slot.ptr.getDefiningOp() == allocator;
+  if (BlockArgument arg = slot.ptr.dyn_cast<BlockArgument>())
+    isResultOrNewBlockArgument = isResultOrNewBlockArgument ||
+                                 arg.getOwner()->getParentOp() == allocator;
+  (void)isResultOrNewBlockArgument;
+  assert(isResultOrNewBlockArgument &&
+         "a slot must be a result of the allocator or an argument of the child "
+         "regions of the allocator");
+}
+
+Value SlotPromoter::getLazyDefaultValue() {
+  if (defaultValue)
+    return defaultValue;
+
+  OpBuilder::InsertionGuard guard(builder);
+  builder.setInsertionPointToStart(slot.ptr.getParentBlock());
+  return defaultValue = allocator.getDefaultValue(slot, builder);
+}
+
+LogicalResult SlotPromoter::computeBlockingUses() {
+  // The promotion of an operation may require the promotion of further
+  // operations (typically, removing operations that use an operation that must
+  // delete itself). We thus need to start from the use of the slot pointer and
+  // propagate further requests through the forward slice.
+
+  // First insert that all immediate users of the slot pointer must no longer
+  // use it.
+  for (OpOperand &use : slot.ptr.getUses()) {
+    SmallPtrSet<OpOperand *, 4> &blockingUses =
+        userToBlockingUses.getOrInsertDefault(use.getOwner());
+    blockingUses.insert(&use);
+  }
+
+  // Then, propagate the requirements for the removal of uses. The
+  // topologically-sorted forward slice allows for all blocking uses of an
+  // operation to have been computed before we reach it. Operations are
+  // traversed in topological order of their uses, starting from the slot
+  // pointer.
+  SetVector<Operation *> forwardSlice;
+  mlir::getForwardSlice(slot.ptr, &forwardSlice);
+  for (Operation *user : forwardSlice) {
+    // If the next operation has no blocking uses, everything is fine.
+    if (!userToBlockingUses.contains(user))
+      continue;
+
+    SmallPtrSet<OpOperand *, 4> &blockingUses = userToBlockingUses[user];
+
+    SmallVector<OpOperand *> newBlockingUses;
+    // If the operation decides it cannot deal with removing the blocking uses,
+    // promotion must fail.
+    if (auto promotable = dyn_cast<PromotableOpInterface>(user)) {
+      if (!promotable.canUsesBeRemoved(slot, blockingUses, newBlockingUses))
+        return failure();
+    } else if (auto promotable = dyn_cast<PromotableMemOpInterface>(user)) {
+      if (!promotable.canUsesBeRemoved(slot, blockingUses, newBlockingUses))
+        return failure();
+    } else {
+      // An operation that has blocking uses must be promoted. If it is not
+      // promotable, promotion must fail.
+      return failure();
+    }
+
+    // Then, register any new blocking uses for coming operations.
+    for (OpOperand *blockingUse : newBlockingUses) {
+      assert(llvm::find(user->getResults(), blockingUse->get()) !=
+             user->result_end());
+
+      SmallPtrSetImpl<OpOperand *> &newUserBlockingUseSet =
+          userToBlockingUses.getOrInsertDefault(blockingUse->getOwner());
+      newUserBlockingUseSet.insert(blockingUse);
+    }
+  }
+
+  // Because this pass currently only supports analysing the parent region of
+  // the slot pointer, if a promotable memory op that needs promotion is
+  // outside of this region, promotion must fail because it will be impossible
+  // to provide a valid `reachingDef` for it.
+  for (auto &[toPromote, _] : userToBlockingUses)
+    if (isa<PromotableMemOpInterface>(toPromote) &&
+        toPromote->getParentRegion() != slot.ptr.getParentRegion())
+      return failure();
+
+  return success();
+}
+
+SmallPtrSet<Block *, 16>
+SlotPromoter::computeSlotLiveIn(SmallPtrSetImpl<Block *> &definingBlocks) {
+  SmallPtrSet<Block *, 16> liveIn;
+
+  // The worklist contains blocks in which it is known that the slot value is
+  // live-in. The further blocks where this value is live-in will be inferred
+  // from these.
+  SmallVector<Block *> liveInWorkList;
+
+  // Blocks with a load before any other store to the slot are the starting
+  // points of the analysis. The slot value is definitely live-in in those
+  // blocks.
+  SmallPtrSet<Block *, 16> visited;
+  for (Operation *user : slot.ptr.getUsers()) {
+    if (visited.contains(user->getBlock()))
+      continue;
+    visited.insert(user->getBlock());
+
+    for (Operation &op : user->getBlock()->getOperations()) {
+      if (auto memOp = dyn_cast<PromotableMemOpInterface>(op)) {
+        // If this operation loads the slot, it is loading from it before
+        // ever writing to it, so the value is live-in in this block.
+        if (memOp.loadsFrom(slot)) {
+          liveInWorkList.push_back(user->getBlock());
+          break;
+        }
+
+        // If we store to the slot, further loads will see that value.
+        // Because we did not meet any load before, the value is not live-in.
+        if (memOp.getStored(slot))
+          break;
+      }
+    }
+  }
+
+  // The information is then propagated to the predecessors until a def site
+  // (store) is found.
+  while (!liveInWorkList.empty()) {
+    Block *liveInBlock = liveInWorkList.pop_back_val();
+
+    if (!liveIn.insert(liveInBlock).second)
+      continue;
+
+    // If a predecessor is a defining block, either:
+    // - It has a load before its first store, in which case it is live-in but
+    // has already been processed in the initialisation step.
+    // - It has a store before any load, in which case it is not live-in.
+    // We can thus at this stage insert to the worklist only predecessors that
+    // are not defining blocks.
+    for (Block *pred : liveInBlock->getPredecessors())
+      if (!definingBlocks.contains(pred))
+        liveInWorkList.push_back(pred);
+  }
+
+  return liveIn;
+}
+
+using IDFCalculator = llvm::IDFCalculatorBase<Block, false>;
+void SlotPromoter::computeMergePoints() {
+  if (slot.ptr.getParentRegion()->hasOneBlock())
+    return;
+
+  IDFCalculator idfCalculator(dominance.getDomTree(slot.ptr.getParentRegion()));
+
+  SmallPtrSet<Block *, 16> definingBlocks;
+  for (Operation *user : slot.ptr.getUsers())
+    if (auto storeOp = dyn_cast<PromotableMemOpInterface>(user))
+      if (storeOp.getStored(slot))
+        definingBlocks.insert(user->getBlock());
+
+  idfCalculator.setDefiningBlocks(definingBlocks);
+
+  SmallPtrSet<Block *, 16> liveIn = computeSlotLiveIn(definingBlocks);
+  idfCalculator.setLiveInBlocks(liveIn);
+
+  SmallVector<Block *> mergePointsVec;
+  idfCalculator.calculate(mergePointsVec);
+
+  mergePoints.insert(mergePointsVec.begin(), mergePointsVec.end());
+}
+
+bool SlotPromoter::areMergePointsUsable() {
+  for (Block *mergePoint : mergePoints)
+    for (Block *pred : mergePoint->getPredecessors())
+      if (!isa<BranchOpInterface>(pred->getTerminator()))
+        return false;
+
+  return true;
+}
+
+Value SlotPromoter::computeReachingDefInBlock(Block *block, Value reachingDef) {
+  for (Operation &op : block->getOperations()) {
+    if (auto memOp = dyn_cast<PromotableMemOpInterface>(op)) {
+      if (userToBlockingUses.contains(memOp))
+        reachingDefs.insert({memOp, reachingDef});
+
+      if (Value stored = memOp.getStored(slot))
+        reachingDef = stored;
+    }
+  }
+
+  return reachingDef;
+}
+
+void SlotPromoter::computeReachingDefInRegion(Region *region,
+                                              Value reachingDef) {
+  if (region->hasOneBlock()) {
+    computeReachingDefInBlock(&region->front(), reachingDef);
+    return;
+  }
+
+  struct DfsJob {
+    llvm::DomTreeNodeBase<Block> *block;
+    Value reachingDef;
+  };
+
+  SmallVector<DfsJob> dfsStack;
+
+  auto &domTree = dominance.getDomTree(slot.ptr.getParentRegion());
+
+  dfsStack.emplace_back<DfsJob>(
+      {domTree.getNode(&region->front()), reachingDef});
+
+  while (!dfsStack.empty()) {
+    DfsJob job = dfsStack.pop_back_val();
+    Block *block = job.block->getBlock();
+
+    if (mergePoints.contains(block)) {
+      BlockArgument blockArgument =
+          block->addArgument(slot.elemType, slot.ptr.getLoc());
+      builder.setInsertionPointToStart(block);
+      allocator.handleBlockArgument(slot, blockArgument, builder);
+      job.reachingDef = blockArgument;
+    }
+
+    job.reachingDef = computeReachingDefInBlock(block, job.reachingDef);
+
+    if (auto terminator = dyn_cast<BranchOpInterface>(block->getTerminator())) {
+      for (BlockOperand &blockOperand : terminator->getBlockOperands()) {
+        if (mergePoints.contains(blockOperand.get())) {
+          if (!job.reachingDef)
+            job.reachingDef = getLazyDefaultValue();
+          terminator.getSuccessorOperands(blockOperand.getOperandNumber())
+              .append(job.reachingDef);
+        }
+      }
+    }
+
+    for (auto *child : job.block->children())
+      dfsStack.emplace_back<DfsJob>({child, job.reachingDef});
+  }
+}
+
+void SlotPromoter::removeBlockingUses() {
+  llvm::SetVector<Operation *> usersToRemoveUses;
+  for (auto &user : llvm::make_first_range(userToBlockingUses))
+    usersToRemoveUses.insert(user);
+  SetVector<Operation *> sortedUsersToRemoveUses =
+      mlir::topologicalSort(usersToRemoveUses);
+
+  llvm::SmallVector<Operation *> toErase;
+  for (Operation *toPromote : llvm::reverse(sortedUsersToRemoveUses)) {
+    if (auto toPromoteMemOp = dyn_cast<PromotableMemOpInterface>(toPromote)) {
+      Value reachingDef = reachingDefs.lookup(toPromoteMemOp);
+      // If no reaching definition is known, this use is outside the reach of
+      // the slot. The default value should thus be used.
+      if (!reachingDef)
+        reachingDef = getLazyDefaultValue();
+
+      builder.setInsertionPointAfter(toPromote);
+      if (toPromoteMemOp.removeBlockingUses(slot, userToBlockingUses[toPromote],
+                                            builder, reachingDef) ==
+          DeletionKind::Delete)
+        toErase.push_back(toPromote);
+
+      continue;
+    }
+
+    auto toPromoteBasic = cast<PromotableOpInterface>(toPromote);
+    builder.setInsertionPointAfter(toPromote);
+    if (toPromoteBasic.removeBlockingUses(slot, userToBlockingUses[toPromote],
+                                          builder) == DeletionKind::Delete)
+      toErase.push_back(toPromote);
+  }
+
+  for (Operation *toEraseOp : toErase)
+    toEraseOp->erase();
+
+  assert(slot.ptr.use_empty() &&
+         "after promotion, the slot pointer should not be used anymore");
+}
+
+void SlotPromoter::promoteSlot() {
+  computeReachingDefInRegion(slot.ptr.getParentRegion(), {});
+
+  // Now that reaching definitions are known, remove all users.
+  removeBlockingUses();
+
+  // Update terminators in dead branches to forward default if they are
+  // succeeded by a merge points.
+  for (Block *mergePoint : mergePoints) {
+    for (BlockOperand &use : mergePoint->getUses()) {
+      auto user = cast<BranchOpInterface>(use.getOwner());
+      SuccessorOperands succOperands =
+          user.getSuccessorOperands(use.getOperandNumber());
+      assert(succOperands.size() == mergePoint->getNumArguments() ||
+             succOperands.size() + 1 == mergePoint->getNumArguments());
+      if (succOperands.size() + 1 == mergePoint->getNumArguments())
+        succOperands.append(getLazyDefaultValue());
+    }
+  }
+
+  allocator.handlePromotionComplete(slot, defaultValue);
+}
+
+LogicalResult SlotPromoter::prepareSlotPromotion() {
+  // First, find the set of operations that will need to be changed for the
+  // promotion to happen. These operations need to resolve some of their uses,
+  // either by rewiring them or simply deleting themselves. If any of them
+  // cannot find a way to resolve their blocking uses, we abort the promotion.
+  if (failed(computeBlockingUses()))
+    return failure();
+
+  // Then, compute blocks in which two or more definitions of the allocated
+  // variable may conflict. These blocks will need a new block argument to
+  // accomodate this.
+  computeMergePoints();
+
+  // The slot can be promoted if the block arguments to be created can
+  // actually be populated with values, which may not be possible depending
+  // on their predecessors.
+  return success(areMergePointsUsable());
+}
+
+LogicalResult mlir::tryToPromoteMemorySlots(
+    ArrayRef<PromotableAllocationOpInterface> allocators, OpBuilder &builder,
+    DominanceInfo &dominance) {
+  // Actual promotion may invalidate the dominance analysis, so slot promotion
+  // is prepated in batches.
+  SmallVector<SlotPromoter> toPromote;
+  for (PromotableAllocationOpInterface allocator : allocators) {
+    for (MemorySlot slot : allocator.getPromotableSlots()) {
+      if (slot.ptr.use_empty())
+        continue;
+
+      SlotPromoter promoter(slot, allocator, builder, dominance);
+      if (succeeded(promoter.prepareSlotPromotion()))
+        toPromote.emplace_back(std::move(promoter));
+    }
+  }
+
+  for (SlotPromoter &promoter : toPromote)
+    promoter.promoteSlot();
+
+  return success(!toPromote.empty());
+}
+
+namespace {
+
+struct Mem2Reg : impl::Mem2RegBase<Mem2Reg> {
+  void runOnOperation() override {
+    Operation *scopeOp = getOperation();
+    bool changed = false;
+
+    for (Region &region : scopeOp->getRegions()) {
+      if (region.getBlocks().empty())
+        continue;
+
+      OpBuilder builder(&region.front(), region.front().begin());
+
+      // Promoting a slot can allow for further promotion of other slots,
+      // promotion is tried until no promotion succeeds.
+      while (true) {
+        DominanceInfo &dominance = getAnalysis<DominanceInfo>();
+
+        SmallVector<PromotableAllocationOpInterface> allocators;
+        // Build a list of allocators to attempt to promote the slots of.
+        for (Block &block : region)
+          for (Operation &op : block.getOperations())
+            if (auto allocator = dyn_cast<PromotableAllocationOpInterface>(op))
+              allocators.emplace_back(allocator);
+
+        // Attempt promoting until no promotion succeeds.
+        if (failed(tryToPromoteMemorySlots(allocators, builder, dominance)))
+          break;
+
+        changed = true;
+        getAnalysisManager().invalidate({});
+      }
+    }
+
+    if (!changed)
+      markAllAnalysesPreserved();
+  }
+};
+
+} // namespace

diff  --git a/mlir/test/Transforms/mem2reg-llvmir-dbginfo.mlir b/mlir/test/Transforms/mem2reg-llvmir-dbginfo.mlir
new file mode 100644
index 0000000000000..d8d04dfcfec51
--- /dev/null
+++ b/mlir/test/Transforms/mem2reg-llvmir-dbginfo.mlir
@@ -0,0 +1,104 @@
+// RUN: mlir-opt %s --pass-pipeline='builtin.module(llvm.func(mem2reg))' | FileCheck %s
+
+llvm.func @use(i64)
+llvm.func @use_ptr(!llvm.ptr)
+
+#di_basic_type = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "ptr sized type", sizeInBits = 64>
+#di_file = #llvm.di_file<"test.ll" in "">
+#di_compile_unit = #llvm.di_compile_unit<sourceLanguage = DW_LANG_C_plus_plus_14, file = #di_file, producer = "clang", isOptimized = false, emissionKind = Full>
+#di_subprogram = #llvm.di_subprogram<compileUnit = #di_compile_unit, scope = #di_file, name = "blah", linkageName = "blah", file = #di_file, line = 7, subprogramFlags = Definition>
+// CHECK: #[[$VAR:.*]] = #llvm.di_local_variable<{{.*}}name = "ptr sized var"{{.*}}>
+#di_local_variable = #llvm.di_local_variable<scope = #di_subprogram, name = "ptr sized var", file = #di_file, line = 7, arg = 1, type = #di_basic_type>
+#di_local_variable_2 = #llvm.di_local_variable<scope = #di_subprogram, name = "ptr sized var 2", file = #di_file, line = 7, arg = 1, type = #di_basic_type>
+
+// CHECK-LABEL: llvm.func @basic_store_load
+llvm.func @basic_store_load(%arg0: i64) -> i64 {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-NOT: = llvm.alloca
+  %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  // CHECK-NOT: llvm.store
+  llvm.store %arg0, %1 {alignment = 4 : i64} : i64, !llvm.ptr
+  // CHECK-NOT: llvm.intr.dbg.declare
+  llvm.intr.dbg.declare #di_local_variable = %1 : !llvm.ptr
+  // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[LOADED:.*]] : i64
+  // CHECK-NOT: llvm.intr.dbg.value
+  // CHECK-NOT: llvm.intr.dbg.declare
+  // CHECK-NOT: llvm.store
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64
+  // CHECK: llvm.return %[[LOADED]] : i64
+  llvm.return %2 : i64
+}
+
+// CHECK-LABEL: llvm.func @block_argument_value
+// CHECK-SAME: (%[[ARG0:.*]]: i64, {{.*}})
+llvm.func @block_argument_value(%arg0: i64, %arg1: i1) -> i64 {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-NOT: = llvm.alloca
+  %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  // CHECK-NOT: llvm.intr.dbg.declare
+  llvm.intr.dbg.declare #di_local_variable = %1 : !llvm.ptr
+  llvm.cond_br %arg1, ^bb1, ^bb2
+// CHECK: ^{{.*}}:
+^bb1:
+  // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[ARG0]]
+  // CHECK-NOT: llvm.intr.dbg.value
+  llvm.store %arg0, %1 {alignment = 4 : i64} : i64, !llvm.ptr
+  llvm.br ^bb2
+// CHECK: ^{{.*}}(%[[BLOCKARG:.*]]: i64):
+^bb2:
+  // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[BLOCKARG]]
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64
+  llvm.return %2 : i64
+}
+
+// CHECK-LABEL: llvm.func @double_block_argument_value
+// CHECK-SAME: (%[[ARG0:.*]]: i64, {{.*}})
+llvm.func @double_block_argument_value(%arg0: i64, %arg1: i1) -> i64 {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-NOT: = llvm.alloca
+  %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  // CHECK-NOT: llvm.intr.dbg.declare
+  llvm.intr.dbg.declare #di_local_variable = %1 : !llvm.ptr
+  llvm.cond_br %arg1, ^bb1, ^bb2
+// CHECK: ^{{.*}}(%[[BLOCKARG1:.*]]: i64):
+^bb1:
+  // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[BLOCKARG1]]
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64
+  llvm.call @use(%2) : (i64) -> ()
+  // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[ARG0]]
+  llvm.store %arg0, %1 {alignment = 4 : i64} : i64, !llvm.ptr
+  llvm.br ^bb2
+  // CHECK-NOT: llvm.intr.dbg.value
+// CHECK: ^{{.*}}(%[[BLOCKARG2:.*]]: i64):
+^bb2:
+  // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[BLOCKARG2]]
+  llvm.br ^bb1
+}
+
+// CHECK-LABEL: llvm.func @always_drop_promoted_declare
+// CHECK-NOT: = llvm.alloca
+// CHECK-NOT: llvm.intr.dbg.
+llvm.func @always_drop_promoted_declare() {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  llvm.intr.dbg.declare #di_local_variable = %1 : !llvm.ptr
+  llvm.return
+}
+
+// CHECK-LABEL: llvm.func @keep_dbg_if_not_promoted
+llvm.func @keep_dbg_if_not_promoted() {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK: %[[ALLOCA:.*]] = llvm.alloca
+  %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  // CHECK-NOT: = llvm.alloca
+  // CHECK-NOT: llvm.intr.dbg.declare
+  // CHECK: llvm.intr.dbg.declare #[[$VAR]] = %[[ALLOCA]]
+  // CHECK-NOT: = llvm.alloca
+  // CHECK-NOT: llvm.intr.dbg.declare
+  // CHECK: llvm.call @use_ptr(%[[ALLOCA]])
+  llvm.intr.dbg.declare #di_local_variable = %1 : !llvm.ptr
+  %2 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  llvm.intr.dbg.declare #di_local_variable_2 = %2 : !llvm.ptr
+  llvm.call @use_ptr(%1) : (!llvm.ptr) -> ()
+  llvm.return
+}

diff  --git a/mlir/test/Transforms/mem2reg-llvmir.mlir b/mlir/test/Transforms/mem2reg-llvmir.mlir
new file mode 100644
index 0000000000000..090f9133f7a96
--- /dev/null
+++ b/mlir/test/Transforms/mem2reg-llvmir.mlir
@@ -0,0 +1,685 @@
+// RUN: mlir-opt %s --pass-pipeline="builtin.module(llvm.func(mem2reg))" --split-input-file | FileCheck %s
+
+// CHECK-LABEL: llvm.func @default_value
+llvm.func @default_value() -> i32 {
+  // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : i32
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: llvm.return %[[UNDEF]] : i32
+  llvm.return %2 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @store_of_ptr
+llvm.func @store_of_ptr() {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.constant(4 : i32) : i32
+  %2 = llvm.mlir.null : !llvm.ptr
+  // CHECK: %[[ALLOCA:.*]] = llvm.alloca
+  %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  // CHECK: llvm.store %{{.*}}, %[[ALLOCA]]
+  llvm.store %1, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+  // CHECK: llvm.store %[[ALLOCA]], %{{.*}}
+  llvm.store %3, %2 {alignment = 8 : i64} : !llvm.ptr, !llvm.ptr
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @unreachable
+llvm.func @unreachable() {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.constant(0 : i32) : i32
+  // CHECK-NOT: = llvm.alloca
+  %2 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  llvm.return
+
+// CHECK: ^{{.*}}:
+// CHECK-NEXT: llvm.return
+^bb1:  // no predecessors
+  llvm.store %1, %2 {alignment = 4 : i64} : i32, !llvm.ptr
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @unreachable_in_loop
+// CHECK-NOT: = llvm.alloca
+llvm.func @unreachable_in_loop() -> i32 {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.constant(6 : i32) : i32
+  %2 = llvm.mlir.constant(5 : i32) : i32
+  %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  llvm.store %1, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+  // CHECK: llvm.br ^[[LOOP:.*]]
+  llvm.br ^bb1
+  
+// CHECK: ^[[LOOP]]:
+^bb1:  // 2 preds: ^bb0, ^bb3
+  // CHECK-NEXT: llvm.br ^[[ENDOFLOOP:.*]]
+  llvm.store %2, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+  llvm.br ^bb3
+
+// CHECK: ^[[UNREACHABLE:.*]]:
+^bb2:  // no predecessors
+  // CHECK-NEXT: llvm.br ^[[ENDOFLOOP]]
+  llvm.br ^bb3
+  
+// CHECK: ^[[ENDOFLOOP]]:
+^bb3:  // 2 preds: ^bb1, ^bb2
+  // CHECK-NEXT: llvm.br ^[[LOOP]]
+  llvm.br ^bb1
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @branching
+// CHECK-NOT: = llvm.alloca
+llvm.func @branching(%arg0: i1, %arg1: i1) -> i32 {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.constant(2 : i32) : i32
+  %2 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  // CHECK: llvm.cond_br %{{.*}}, ^[[BB2:.*]](%{{.*}} : i32), ^{{.*}}
+  llvm.cond_br %arg0, ^bb2, ^bb1
+^bb1:  // pred: ^bb0
+  llvm.store %1, %2 {alignment = 4 : i64} : i32, !llvm.ptr
+  // CHECK: llvm.cond_br %{{.*}}, ^[[BB2]](%{{.*}} : i32), ^[[BB2]](%{{.*}} : i32)
+  llvm.cond_br %arg1, ^bb2, ^bb2
+// CHECK: ^[[BB2]](%[[V3:.*]]: i32):
+^bb2:  // 3 preds: ^bb0, ^bb1, ^bb1
+  %3 = llvm.load %2 {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: llvm.return %[[V3]] : i32
+  llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @recursive_alloca
+// CHECK-NOT: = llvm.alloca
+llvm.func @recursive_alloca() -> i32 {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.constant(0 : i32) : i32
+  %2 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  %4 = llvm.alloca %0 x !llvm.ptr {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  llvm.store %1, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+  llvm.store %3, %4 {alignment = 8 : i64} : !llvm.ptr, !llvm.ptr
+  %5 = llvm.load %4 {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr
+  %6 = llvm.load %5 {alignment = 4 : i64} : !llvm.ptr -> i32
+  llvm.store %6, %2 {alignment = 4 : i64} : i32, !llvm.ptr
+  %7 = llvm.load %2 {alignment = 4 : i64} : !llvm.ptr -> i32
+  llvm.return %7 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @reset_in_branch
+// CHECK-NOT: = llvm.alloca
+// CHECK-NOT: ^{{.*}}({{.*}}):
+llvm.func @reset_in_branch(%arg0: i32, %arg1: i1) {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.constant(true) : i1
+  %2 = llvm.mlir.constant(false) : i1
+  %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  llvm.store %arg0, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+  llvm.cond_br %arg1, ^bb1, ^bb2
+^bb1:  // pred: ^bb0
+  llvm.store %arg0, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+  %4 = llvm.load %3 {alignment = 4 : i64} : !llvm.ptr -> i32
+  llvm.call @reset_in_branch(%4, %2) : (i32, i1) -> ()
+  llvm.br ^bb3
+^bb2:  // pred: ^bb0
+  %5 = llvm.load %3 {alignment = 4 : i64} : !llvm.ptr -> i32
+  llvm.call @reset_in_branch(%5, %1) : (i32, i1) -> ()
+  llvm.br ^bb3
+^bb3:  // 2 preds: ^bb1, ^bb2
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @intertwined_alloca
+// CHECK-NOT: = llvm.alloca
+llvm.func @intertwined_alloca(%arg0: !llvm.ptr, %arg1: i32) {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.constant(0 : i32) : i32
+  %2 = llvm.alloca %0 x !llvm.ptr {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  %4 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  %5 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  %6 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  llvm.store %arg0, %2 {alignment = 8 : i64} : !llvm.ptr, !llvm.ptr
+  llvm.store %arg1, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+  llvm.store %1, %4 {alignment = 4 : i64} : i32, !llvm.ptr
+  llvm.br ^bb1
+^bb1:  // 2 preds: ^bb0, ^bb4
+  %7 = llvm.load %3 {alignment = 4 : i64} : !llvm.ptr -> i32
+  %8 = llvm.add %7, %0  : i32
+  %9 = llvm.load %4 {alignment = 4 : i64} : !llvm.ptr -> i32
+  %10 = llvm.icmp "sgt" %8, %9 : i32
+  %11 = llvm.zext %10 : i1 to i32
+  llvm.cond_br %10, ^bb2, ^bb5
+^bb2:  // pred: ^bb1
+  %12 = llvm.load %6 {alignment = 4 : i64} : !llvm.ptr -> i32
+  llvm.store %12, %5 {alignment = 4 : i64} : i32, !llvm.ptr
+  llvm.store %1, %6 {alignment = 4 : i64} : i32, !llvm.ptr
+  %13 = llvm.load %4 {alignment = 4 : i64} : !llvm.ptr -> i32
+  %14 = llvm.icmp "sgt" %13, %1 : i32
+  %15 = llvm.zext %14 : i1 to i32
+  llvm.cond_br %14, ^bb3, ^bb4
+^bb3:  // pred: ^bb2
+  %16 = llvm.load %2 {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr
+  %17 = llvm.load %4 {alignment = 4 : i64} : !llvm.ptr -> i32
+  %18 = llvm.sub %17, %0  : i32
+  %19 = llvm.getelementptr %16[%18] : (!llvm.ptr, i32) -> !llvm.ptr, i8
+  %20 = llvm.load %5 {alignment = 4 : i64} : !llvm.ptr -> i32
+  %21 = llvm.trunc %20 : i32 to i8
+  llvm.store %21, %19 {alignment = 1 : i64} : i8, !llvm.ptr
+  llvm.br ^bb4
+^bb4:  // 2 preds: ^bb2, ^bb3
+  %22 = llvm.load %4 {alignment = 4 : i64} : !llvm.ptr -> i32
+  %23 = llvm.add %22, %0  : i32
+  llvm.store %23, %4 {alignment = 4 : i64} : i32, !llvm.ptr
+  llvm.br ^bb1
+^bb5:  // pred: ^bb1
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @complex_cf
+// CHECK-NOT: = llvm.alloca
+llvm.func @complex_cf(%arg0: i32, ...) {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.constant(false) : i1
+  %2 = llvm.mlir.constant(0 : i32) : i32
+  %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  llvm.cond_br %1, ^bb1, ^bb2
+^bb1:  // pred: ^bb0
+  llvm.br ^bb2
+^bb2:  // 2 preds: ^bb0, ^bb1
+  llvm.store %2, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+  llvm.br ^bb3
+^bb3:  // 2 preds: ^bb2, ^bb16
+  llvm.cond_br %1, ^bb4, ^bb17
+^bb4:  // pred: ^bb3
+  llvm.cond_br %1, ^bb5, ^bb14
+^bb5:  // pred: ^bb4
+  llvm.cond_br %1, ^bb7, ^bb6
+^bb6:  // pred: ^bb5
+  llvm.br ^bb7
+^bb7:  // 2 preds: ^bb5, ^bb6
+  llvm.cond_br %1, ^bb9, ^bb8
+^bb8:  // pred: ^bb7
+  llvm.br ^bb9
+^bb9:  // 2 preds: ^bb7, ^bb8
+  llvm.cond_br %1, ^bb11, ^bb10
+^bb10:  // pred: ^bb9
+  llvm.br ^bb11
+^bb11:  // 2 preds: ^bb9, ^bb10
+  llvm.cond_br %1, ^bb12, ^bb13
+^bb12:  // pred: ^bb11
+  llvm.br ^bb13
+^bb13:  // 2 preds: ^bb11, ^bb12
+  llvm.br ^bb14
+^bb14:  // 2 preds: ^bb4, ^bb13
+  llvm.cond_br %1, ^bb15, ^bb16
+^bb15:  // pred: ^bb14
+  llvm.br ^bb16
+^bb16:  // 2 preds: ^bb14, ^bb15
+  llvm.br ^bb3
+^bb17:  // pred: ^bb3
+  llvm.br ^bb20
+^bb18:  // no predecessors
+  %4 = llvm.load %3 {alignment = 4 : i64} : !llvm.ptr -> i32
+  llvm.br ^bb24
+^bb19:  // no predecessors
+  llvm.br ^bb20
+^bb20:  // 2 preds: ^bb17, ^bb19
+  llvm.cond_br %1, ^bb21, ^bb22
+^bb21:  // pred: ^bb20
+  llvm.br ^bb23
+^bb22:  // pred: ^bb20
+  llvm.br ^bb23
+^bb23:  // 2 preds: ^bb21, ^bb22
+  llvm.br ^bb24
+^bb24:  // 2 preds: ^bb18, ^bb23
+  llvm.br ^bb26
+^bb25:  // no predecessors
+  llvm.br ^bb26
+^bb26:  // 2 preds: ^bb24, ^bb25
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @llvm_crash
+llvm.func @llvm_crash() -> i32 {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.constant(0 : i32) : i32
+  %2 = llvm.mlir.addressof @j : !llvm.ptr
+  %3 = llvm.mlir.constant(0 : i8) : i8
+  // CHECK-NOT: = llvm.alloca
+  // CHECK: %[[VOLATILE_ALLOCA:.*]] = llvm.alloca
+  // CHECK-NOT: = llvm.alloca
+  %4 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  %5 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  %6 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  %7 = llvm.bitcast %1 : i32 to i32
+  // CHECK: llvm.store volatile %{{.*}}, %[[VOLATILE_ALLOCA]]
+  llvm.store volatile %1, %5 {alignment = 4 : i64} : i32, !llvm.ptr
+  %8 = llvm.call @_setjmp(%2) : (!llvm.ptr) -> i32
+  %9 = llvm.icmp "ne" %8, %1 : i32
+  %10 = llvm.zext %9 : i1 to i8
+  %11 = llvm.icmp "ne" %10, %3 : i8
+  llvm.cond_br %11, ^bb1, ^bb2
+^bb1:  // pred: ^bb0
+  // CHECK: = llvm.load volatile %[[VOLATILE_ALLOCA]]
+  %12 = llvm.load volatile %5 {alignment = 4 : i64} : !llvm.ptr -> i32
+  llvm.store %12, %6 {alignment = 4 : i64} : i32, !llvm.ptr
+  llvm.br ^bb3
+^bb2:  // pred: ^bb0
+  // CHECK: llvm.store volatile %{{.*}}, %[[VOLATILE_ALLOCA]]
+  llvm.store volatile %0, %5 {alignment = 4 : i64} : i32, !llvm.ptr
+  llvm.call @g() : () -> ()
+  llvm.store %1, %6 {alignment = 4 : i64} : i32, !llvm.ptr
+  llvm.br ^bb3
+^bb3:  // 2 preds: ^bb1, ^bb2
+  %13 = llvm.load %6 {alignment = 4 : i64} : !llvm.ptr -> i32
+  llvm.store %13, %4 {alignment = 4 : i64} : i32, !llvm.ptr
+  llvm.br ^bb4
+^bb4:  // pred: ^bb3
+  %14 = llvm.load %4 {alignment = 4 : i64} : !llvm.ptr -> i32
+  llvm.return %14 : i32
+}
+llvm.mlir.global external @j() {addr_space = 0 : i32} : !llvm.array<1 x struct<"struct.__jmp_buf_tag", (array<6 x i32>, i32, struct<"struct.__sigset_t", (array<32 x i32>)>)>>
+llvm.func @_setjmp(!llvm.ptr) -> i32 attributes {passthrough = ["returns_twice"]}
+llvm.func @g()
+
+// -----
+
+// CHECK-LABEL: llvm.func amdgpu_kernelcc @addrspace_discard
+// CHECK-NOT: = llvm.alloca
+llvm.func amdgpu_kernelcc @addrspace_discard() {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.constant(2 : i64) : i64
+  %2 = llvm.alloca %0 x i8 {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
+  %3 = llvm.addrspacecast %2 : !llvm.ptr<5> to !llvm.ptr
+  llvm.intr.lifetime.start 2, %3 : !llvm.ptr
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @ignore_atomic
+// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32
+llvm.func @ignore_atomic(%arg0: i32) -> i32 {
+  // CHECK-NOT: = llvm.alloca
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  llvm.store %arg0, %1 atomic seq_cst {alignment = 4 : i64} : i32, !llvm.ptr
+  %2 = llvm.load %1 atomic seq_cst {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: llvm.return %[[ARG0]] : i32
+  llvm.return %2 : i32
+}
+
+// -----
+
+// CHECK: llvm.func @landing_pad
+// CHECK-NOT: = llvm.alloca
+llvm.func @landing_pad() -> i32 attributes {personality = @__gxx_personality_v0} {
+  // CHECK-NOT: = llvm.alloca
+  // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : i32
+  // CHECK-NOT: = llvm.alloca
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  // CHECK: %[[V2:.*]] = llvm.invoke
+  %2 = llvm.invoke @landing_padf() to ^bb1 unwind ^bb3 : () -> i32
+// CHECK: ^{{.*}}:
+^bb1:// pred: ^bb0
+  llvm.store %2, %1 {alignment = 4 : i64} : i32, !llvm.ptr
+  // CHECK: llvm.br ^[[BB2:.*]](%[[V2]] : i32)
+  llvm.br ^bb2
+// CHECK: ^[[BB2]]([[V3:.*]]: i32):
+^bb2:// 2 preds: ^bb1, ^bb3
+  %3 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: llvm.return [[V3]] : i32
+  llvm.return %3 : i32
+// CHECK: ^{{.*}}:
+^bb3:// pred: ^bb0
+  %4 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)>
+  // CHECK: llvm.br ^[[BB2:.*]](%[[UNDEF]] : i32)
+  llvm.br ^bb2
+}
+llvm.func @landing_padf() -> i32
+llvm.func @__gxx_personality_v0(...) -> i32
+
+// -----
+
+// CHECK-LABEL: llvm.func @unreachable_defines
+llvm.func @unreachable_defines() -> i32 {
+  // CHECK-NOT: = llvm.alloca
+  // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : i32
+  // CHECK-NOT: = llvm.alloca
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  llvm.br ^bb1
+^bb1:  // 2 preds: ^bb0, ^bb2
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: llvm.return %[[UNDEF]] : i32
+  llvm.return %2 : i32
+^bb2:  // no predecessors
+  %3 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+  llvm.store %3, %1 {alignment = 4 : i64} : i32, !llvm.ptr
+  llvm.br ^bb1
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @unreachable_jumps_to_merge_point
+// CHECK-NOT: = llvm.alloca
+llvm.func @unreachable_jumps_to_merge_point(%arg0: i1) -> i32 {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.constant(6 : i32) : i32
+  %2 = llvm.mlir.constant(5 : i32) : i32
+  %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  llvm.cond_br %arg0, ^bb1, ^bb2
+^bb1:  // 2 preds: ^bb0, ^bb4
+  llvm.store %1, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+  llvm.br ^bb4
+^bb2:  // pred: ^bb0
+  llvm.store %2, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+  llvm.br ^bb4
+^bb3:  // no predecessors
+  llvm.br ^bb4
+^bb4:  // 3 preds: ^bb1, ^bb2, ^bb3
+  %4 = llvm.load %3 {alignment = 4 : i64} : !llvm.ptr -> i32
+  llvm.return %4 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @ignore_lifetime
+// CHECK-NOT: = llvm.alloca
+llvm.func @ignore_lifetime() {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  llvm.intr.lifetime.start 2, %1 : !llvm.ptr
+  llvm.store %0, %1 {alignment = 4 : i64} : i32, !llvm.ptr
+  llvm.intr.lifetime.end 2, %1 : !llvm.ptr
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @ignore_discardable_tree
+// CHECK-NOT: = llvm.alloca
+llvm.func @ignore_discardable_tree() {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.constant(0 : i16) : i16
+  %2 = llvm.mlir.constant(0 : i8) : i8
+  %3 = llvm.mlir.undef : !llvm.struct<(i8, i16)>
+  %4 = llvm.insertvalue %2, %3[0] : !llvm.struct<(i8, i16)> 
+  %5 = llvm.insertvalue %1, %4[1] : !llvm.struct<(i8, i16)> 
+  %6 = llvm.alloca %0 x !llvm.struct<(i8, i16)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %7 = llvm.getelementptr %6[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(i8, i16)>
+  llvm.intr.lifetime.start 2, %7 : !llvm.ptr
+  llvm.store %5, %6 {alignment = 2 : i64} : !llvm.struct<(i8, i16)>, !llvm.ptr
+  llvm.intr.lifetime.end 2, %7 : !llvm.ptr
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @store_load_forward
+llvm.func @store_load_forward() -> i32 {
+  // CHECK-NOT: = llvm.alloca
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK: %[[RES:.*]] = llvm.mlir.constant(0 : i32) : i32
+  %1 = llvm.mlir.constant(0 : i32) : i32
+  %2 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  llvm.store %1, %2 {alignment = 4 : i64} : i32, !llvm.ptr
+  %3 = llvm.load %2 {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: llvm.return %[[RES]] : i32
+  llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @store_load_wrong_type
+llvm.func @store_load_wrong_type() -> i16 {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.constant(0 : i32) : i32
+  // CHECK: = llvm.alloca
+  %2 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  llvm.store %1, %2 {alignment = 4 : i64} : i32, !llvm.ptr
+  %3 = llvm.load %2 {alignment = 2 : i64} : !llvm.ptr -> i16
+  llvm.return %3 : i16
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @merge_point_cycle
+llvm.func @merge_point_cycle() {
+  // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : i32
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.constant(7 : i32) : i32
+  %2 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  // CHECK: llvm.br ^[[BB1:.*]](%[[UNDEF]] : i32)
+  llvm.br ^bb1
+// CHECK: ^[[BB1]](%[[BARG:.*]]: i32):
+^bb1:  // 2 preds: ^bb0, ^bb1
+  %3 = llvm.load %2 {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: = llvm.call @use(%[[BARG]])
+  %4 = llvm.call @use(%3) : (i32) -> i1
+  // CHECK: %[[DEF:.*]] = llvm.call @def
+  %5 = llvm.call @def(%1) : (i32) -> i32
+  llvm.store %5, %2 {alignment = 4 : i64} : i32, !llvm.ptr
+  // CHECK: llvm.cond_br %{{.*}}, ^[[BB1]](%[[DEF]] : i32), ^{{.*}}
+  llvm.cond_br %4, ^bb1, ^bb2
+^bb2:  // pred: ^bb1
+  llvm.return
+}
+
+llvm.func @def(i32) -> i32
+llvm.func @use(i32) -> i1
+
+// -----
+
+// CHECK-LABEL: llvm.func @no_unnecessary_arguments
+llvm.func @no_unnecessary_arguments() {
+  // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : i32
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+  // CHECK: llvm.br ^[[BB1:.*]]
+  llvm.br ^bb1
+// CHECK: ^[[BB1]]:
+^bb1:  // 2 preds: ^bb0, ^bb1
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: = llvm.call @use(%[[UNDEF]])
+  %3 = llvm.call @use(%2) : (i32) -> i1
+  // CHECK: llvm.cond_br %{{.*}}, ^[[BB1]], ^{{.*}}
+  llvm.cond_br %3, ^bb1, ^bb2
+^bb2:  // pred: ^bb1
+  llvm.return
+}
+
+llvm.func @use(i32) -> i1
+
+// -----
+
+// CHECK-LABEL: llvm.func @discardable_use_tree
+// CHECK-NOT: = llvm.alloca
+llvm.func @discardable_use_tree() {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.constant(2 : i64) : i64
+  %2 = llvm.alloca %0 x i8 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %3 = llvm.bitcast %2 : !llvm.ptr to !llvm.ptr
+  %4 = llvm.bitcast %3 : !llvm.ptr to !llvm.ptr
+  llvm.intr.lifetime.start 2, %3 : !llvm.ptr
+  llvm.intr.lifetime.start 2, %4 : !llvm.ptr
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @non_discardable_use_tree
+llvm.func @non_discardable_use_tree() {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.constant(2 : i64) : i64
+  // CHECK: = llvm.alloca
+  %2 = llvm.alloca %0 x i8 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %3 = llvm.bitcast %2 : !llvm.ptr to !llvm.ptr
+  %4 = llvm.bitcast %3 : !llvm.ptr to !llvm.ptr
+  llvm.intr.lifetime.start 2, %3 : !llvm.ptr
+  llvm.intr.lifetime.start 2, %4 : !llvm.ptr
+  llvm.call @use(%4) : (!llvm.ptr) -> i1
+  llvm.return
+}
+llvm.func @use(!llvm.ptr) -> i1
+
+// -----
+
+// CHECK-LABEL: llvm.func @trivial_get_element_ptr
+// CHECK-NOT: = llvm.alloca
+llvm.func @trivial_get_element_ptr() {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.constant(2 : i64) : i64
+  %2 = llvm.alloca %0 x i8 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %3 = llvm.bitcast %2 : !llvm.ptr to !llvm.ptr
+  %4 = llvm.getelementptr %3[0, 0, 0] : (!llvm.ptr) -> !llvm.ptr, i8
+  llvm.intr.lifetime.start 2, %3 : !llvm.ptr
+  llvm.intr.lifetime.start 2, %4 : !llvm.ptr
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @nontrivial_get_element_ptr
+llvm.func @nontrivial_get_element_ptr() {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.constant(2 : i64) : i64
+  // CHECK: = llvm.alloca
+  %2 = llvm.alloca %0 x i8 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %3 = llvm.bitcast %2 : !llvm.ptr to !llvm.ptr
+  %4 = llvm.getelementptr %3[0, 1, 0] : (!llvm.ptr) -> !llvm.ptr, i8
+  llvm.intr.lifetime.start 2, %3 : !llvm.ptr
+  llvm.intr.lifetime.start 2, %4 : !llvm.ptr
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @dynamic_get_element_ptr
+llvm.func @dynamic_get_element_ptr() {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  %1 = llvm.mlir.constant(2 : i64) : i64
+  // CHECK: = llvm.alloca
+  %2 = llvm.alloca %0 x i8 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  %3 = llvm.bitcast %2 : !llvm.ptr to !llvm.ptr
+  %4 = llvm.getelementptr %3[0, %0] : (!llvm.ptr, i32) -> !llvm.ptr, i8
+  llvm.intr.lifetime.start 2, %3 : !llvm.ptr
+  llvm.intr.lifetime.start 2, %4 : !llvm.ptr
+  llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @live_cycle
+// CHECK-SAME: (%[[ARG0:.*]]: i64, %{{.*}}: i1, %[[ARG2:.*]]: i64) -> i64
+llvm.func @live_cycle(%arg0: i64, %arg1: i1, %arg2: i64) -> i64 {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-NOT: = llvm.alloca
+  %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  llvm.store %arg2, %1 {alignment = 4 : i64} : i64, !llvm.ptr
+  // CHECK: llvm.cond_br %{{.*}}, ^[[BB1:.*]](%[[ARG2]] : i64), ^[[BB2:.*]](%[[ARG2]] : i64)
+  llvm.cond_br %arg1, ^bb1, ^bb2
+// CHECK: ^[[BB1]](%[[V1:.*]]: i64):
+^bb1:
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64
+  // CHECK: llvm.call @use(%[[V1]])
+  llvm.call @use(%2) : (i64) -> ()
+  llvm.store %arg0, %1 {alignment = 4 : i64} : i64, !llvm.ptr
+  // CHECK: llvm.br ^[[BB2]](%[[ARG0]] : i64)
+  llvm.br ^bb2
+// CHECK: ^[[BB2]](%[[V2:.*]]: i64):
+^bb2:
+  // CHECK: llvm.br ^[[BB1]](%[[V2]] : i64)
+  llvm.br ^bb1
+}
+
+llvm.func @use(i64)
+
+// -----
+
+// This test should no longer be an issue once promotion within subregions
+// is supported.
+// CHECK-LABEL: llvm.func @subregion_block_promotion
+// CHECK-SAME: (%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64) -> i64
+llvm.func @subregion_block_promotion(%arg0: i64, %arg1: i64) -> i64 {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK: %[[ALLOCA:.*]] = llvm.alloca
+  %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  // CHECK: llvm.store %[[ARG1]], %[[ALLOCA]]
+  llvm.store %arg1, %1 {alignment = 4 : i64} : i64, !llvm.ptr
+  // CHECK: scf.execute_region {
+  scf.execute_region {
+    // CHECK: llvm.store %[[ARG0]], %[[ALLOCA]]
+    llvm.store %arg0, %1 {alignment = 4 : i64} : i64, !llvm.ptr
+    scf.yield
+  }
+  // CHECK: }
+  // CHECK: %[[RES:.*]] = llvm.load %[[ALLOCA]]
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64
+  // CHECK: llvm.return %[[RES]] : i64
+  llvm.return %2 : i64
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @subregion_simple_transitive_promotion
+// CHECK-SAME: (%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64) -> i64
+llvm.func @subregion_simple_transitive_promotion(%arg0: i64, %arg1: i64) -> i64 {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK-NOT: = llvm.alloca
+  %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  llvm.store %arg1, %1 {alignment = 4 : i64} : i64, !llvm.ptr
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64
+  // CHECK: scf.execute_region {
+  scf.execute_region {
+    // CHECK: llvm.call @use(%[[ARG1]])
+    llvm.call @use(%2) : (i64) -> ()
+    scf.yield
+  }
+  // CHECK: }
+  // CHECK: llvm.return %[[ARG1]] : i64
+  llvm.return %2 : i64
+}
+
+llvm.func @use(i64)
+
+// -----
+
+// This behavior is specific to the LLVM dialect, because LLVM semantics are
+// that reaching an alloca multiple times allocates on the stack multiple
+// times. Promoting an alloca that is reached multiple times could lead to
+// changes in observable behavior. Thus only allocas in the entry block are
+// promoted.
+
+// CHECK-LABEL: llvm.func @no_inner_alloca_promotion
+// CHECK-SAME: (%[[ARG:.*]]: i64) -> i64
+llvm.func @no_inner_alloca_promotion(%arg: i64) -> i64 {
+  %0 = llvm.mlir.constant(1 : i32) : i32
+  llvm.br ^bb1
+^bb1:
+  // CHECK: %[[ALLOCA:.*]] = llvm.alloca
+  %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+  // CHECK: llvm.store %[[ARG]], %[[ALLOCA]]
+  llvm.store %arg, %1 {alignment = 4 : i64} : i64, !llvm.ptr
+  // CHECK: %[[RES:.*]] = llvm.load %[[ALLOCA]]
+  %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64
+  // CHECK: llvm.return %[[RES]] : i64
+  llvm.return %2 : i64
+}