[Mlir-commits] [mlir] f88f8fd - [mlir] Add a generic mem2reg implementation.
Tobias Gysi
llvmlistbot at llvm.org
Wed Apr 26 23:12:42 PDT 2023
Author: Théo Degioanni
Date: 2023-04-27T06:00:48Z
New Revision: f88f8fd0bca34dc8e5571cb828b1a159a50fd504
URL: https://github.com/llvm/llvm-project/commit/f88f8fd0bca34dc8e5571cb828b1a159a50fd504
DIFF: https://github.com/llvm/llvm-project/commit/f88f8fd0bca34dc8e5571cb828b1a159a50fd504.diff
LOG: [mlir] Add a generic mem2reg implementation.
This patch introduces a generic implementation of mem2reg on
unstructured control-flow, along with a specialization for LLVM IR. This
is achieved by defining three new interfaces, representing 1. allocating
operations, 2. operations doing memory accesses, 3. operations that can
be rewired and/or deleted to stop using a specific use.
The file containing the core implementation of the algorithm
(`Mem2Reg.cpp`) contains a detailed explanation of how the algorithm
works. The contract for this pass is that given a memory slot with a
single non-aliased pointer, the pass will either remove all the uses of
the pointer or not change anything.
To help review this patch, I recommend starting by looking at the
interfaces defined in `Mem2Reg.td`, along with their reference
implementation for LLVM IR defined in `LLVMMem2Reg.cpp`. Then, the core
algorithm is implemented in `Mem2Reg.cpp`.
If this is all good I also have an implementation of the interfaces for
0-dimensional memref promotion that I can upstream afterwards.
Reviewed By: gysit
Differential Revision: https://reviews.llvm.org/D148109
Added:
mlir/include/mlir/Interfaces/Mem2RegInterfaces.h
mlir/include/mlir/Interfaces/Mem2RegInterfaces.td
mlir/include/mlir/Transforms/Mem2Reg.h
mlir/lib/Dialect/LLVMIR/IR/LLVMMem2Reg.cpp
mlir/lib/Interfaces/Mem2RegInterfaces.cpp
mlir/lib/Transforms/Mem2Reg.cpp
mlir/test/Transforms/mem2reg-llvmir-dbginfo.mlir
mlir/test/Transforms/mem2reg-llvmir.mlir
Modified:
llvm/include/llvm/ADT/DenseMap.h
mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
mlir/include/mlir/Interfaces/CMakeLists.txt
mlir/include/mlir/Transforms/Passes.h
mlir/include/mlir/Transforms/Passes.td
mlir/lib/Dialect/LLVMIR/CMakeLists.txt
mlir/lib/Interfaces/CMakeLists.txt
mlir/lib/Transforms/CMakeLists.txt
Removed:
################################################################################
diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h
index 8a680c031d302..3ef6a7cd1b4b5 100644
--- a/llvm/include/llvm/ADT/DenseMap.h
+++ b/llvm/include/llvm/ADT/DenseMap.h
@@ -312,6 +312,20 @@ class DenseMapBase : public DebugEpochBase {
insert(*I);
}
+ /// Returns the value associated to the key in the map if it exists. If it
+ /// does not exist, emplace a default value for the key and returns a
+ /// reference to the newly created value.
+ ValueT &getOrInsertDefault(KeyT &&Key) {
+ return try_emplace(Key).first->second;
+ }
+
+ /// Returns the value associated to the key in the map if it exists. If it
+ /// does not exist, emplace a default value for the key and returns a
+ /// reference to the newly created value.
+ ValueT &getOrInsertDefault(const KeyT &Key) {
+ return try_emplace(Key).first->second;
+ }
+
bool erase(const KeyT &Val) {
BucketT *TheBucket;
if (!LookupBucketFor(Val, TheBucket))
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
index 9822c092ea473..728f95291a699 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
@@ -29,6 +29,7 @@
#include "mlir/Interfaces/InferTypeOpInterface.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "mlir/Support/ThreadLocalCache.h"
+#include "mlir/Transforms/Mem2Reg.h"
#include "llvm/ADT/PointerEmbeddedInt.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/LLVMContext.h"
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
index 6e35a3a9d2627..4b30d0c164c81 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
@@ -6,6 +6,7 @@ include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td"
include "mlir/Dialect/LLVMIR/LLVMEnums.td"
include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
include "mlir/Interfaces/InferTypeOpInterface.td"
+include "mlir/Interfaces/Mem2RegInterfaces.td"
// Operations that correspond to LLVM intrinsics. With MLIR operation set being
// extendable, there is no reason to introduce a hard boundary between "core"
@@ -214,7 +215,8 @@ def LLVM_NoAliasScopeDeclOp
/// Base operation for lifetime markers. The LLVM intrinsics require the size
/// operand to be an immediate. In MLIR it is encoded as an attribute.
-class LLVM_LifetimeBaseOp<string opName> : LLVM_ZeroResultIntrOp<opName> {
+class LLVM_LifetimeBaseOp<string opName> : LLVM_ZeroResultIntrOp<opName, [],
+ [DeclareOpInterfaceMethods<PromotableOpInterface>]> {
let arguments = (ins I64Attr:$size, LLVM_AnyPointer:$ptr);
// Custom builder to convert the size attribute to an integer.
@@ -322,7 +324,8 @@ def LLVM_CoroResumeOp : LLVM_IntrOp<"coro.resume", [], [], [], 0> {
// Debug function intrinsics.
//
-class LLVM_DbgIntrOp<string name, string argName> : LLVM_IntrOp<name, [], [], [], 0> {
+class LLVM_DbgIntrOp<string name, string argName, list<Trait> traits = []>
+ : LLVM_IntrOp<name, [], [], traits, 0> {
let llvmBuilder = [{
llvm::Module *module = builder.GetInsertBlock()->getModule();
llvm::LLVMContext &ctx = module->getContext();
@@ -363,7 +366,8 @@ class LLVM_DbgIntrOp<string name, string argName> : LLVM_IntrOp<name, [], [], []
}];
}
-def LLVM_DbgDeclareOp : LLVM_DbgIntrOp<"dbg.declare", "addr"> {
+def LLVM_DbgDeclareOp : LLVM_DbgIntrOp< "dbg.declare", "addr",
+ [DeclareOpInterfaceMethods<PromotableOpInterface>]> {
let summary = "Declare the address of a local debug info variable.";
let arguments = (ins LLVM_AnyPointer:$addr, LLVM_DILocalVariableAttr:$varInfo);
}
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 29b41ff122d97..f9025e01c1f69 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -22,6 +22,7 @@ include "mlir/IR/SymbolInterfaces.td"
include "mlir/Interfaces/CallInterfaces.td"
include "mlir/Interfaces/ControlFlowInterfaces.td"
include "mlir/Interfaces/InferTypeOpInterface.td"
+include "mlir/Interfaces/Mem2RegInterfaces.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
class LLVM_Builder<string builder> {
@@ -171,7 +172,9 @@ def LLVM_FNegOp : LLVM_UnaryFloatArithmeticOp<
LLVM_ScalarOrVectorOf<LLVM_AnyFloat>, "fneg", "FNeg">;
// Memory-related operations.
-def LLVM_AllocaOp : LLVM_Op<"alloca">, LLVM_MemOpPatterns {
+def LLVM_AllocaOp : LLVM_Op<"alloca",
+ [DeclareOpInterfaceMethods<PromotableAllocationOpInterface>]>,
+ LLVM_MemOpPatterns {
let arguments = (ins AnyInteger:$arraySize,
OptionalAttr<I64Attr>:$alignment,
OptionalAttr<TypeAttr>:$elem_type,
@@ -228,7 +231,8 @@ def LLVM_AllocaOp : LLVM_Op<"alloca">, LLVM_MemOpPatterns {
let hasVerifier = 1;
}
-def LLVM_GEPOp : LLVM_Op<"getelementptr", [Pure]> {
+def LLVM_GEPOp : LLVM_Op<"getelementptr", [Pure,
+ DeclareOpInterfaceMethods<PromotableOpInterface>]> {
let arguments = (ins LLVM_ScalarOrVectorOf<LLVM_AnyPointer>:$base,
Variadic<LLVM_ScalarOrVectorOf<AnyInteger>>:$dynamicIndices,
DenseI32ArrayAttr:$rawConstantIndices,
@@ -311,7 +315,8 @@ def LLVM_GEPOp : LLVM_Op<"getelementptr", [Pure]> {
let hasVerifier = 1;
}
-def LLVM_LoadOp : LLVM_MemAccessOpBase<"load"> {
+def LLVM_LoadOp : LLVM_MemAccessOpBase<"load",
+ [DeclareOpInterfaceMethods<PromotableMemOpInterface>]> {
dag args = (ins Arg<LLVM_PointerTo<LLVM_LoadableType>, "", [MemRead]>:$addr,
OptionalAttr<I64Attr>:$alignment,
UnitAttr:$volatile_,
@@ -382,7 +387,8 @@ def LLVM_LoadOp : LLVM_MemAccessOpBase<"load"> {
let hasVerifier = 1;
}
-def LLVM_StoreOp : LLVM_MemAccessOpBase<"store"> {
+def LLVM_StoreOp : LLVM_MemAccessOpBase<"store",
+ [DeclareOpInterfaceMethods<PromotableMemOpInterface>]> {
dag args = (ins LLVM_LoadableType:$value,
Arg<LLVM_PointerTo<LLVM_LoadableType>,"",[MemWrite]>:$addr,
OptionalAttr<I64Attr>:$alignment,
@@ -465,14 +471,15 @@ class LLVM_CastOp<string mnemonic, string instName, Type type,
$_location, $_resultType, $arg);
}];
}
-def LLVM_BitcastOp : LLVM_CastOp<"bitcast", "BitCast",
- LLVM_AnyNonAggregate, LLVM_AnyNonAggregate> {
+def LLVM_BitcastOp : LLVM_CastOp<"bitcast", "BitCast", LLVM_AnyNonAggregate,
+ LLVM_AnyNonAggregate, [DeclareOpInterfaceMethods<PromotableOpInterface>]> {
let hasFolder = 1;
let hasVerifier = 1;
}
def LLVM_AddrSpaceCastOp : LLVM_CastOp<"addrspacecast", "AddrSpaceCast",
- LLVM_ScalarOrVectorOf<LLVM_AnyPointer>,
- LLVM_ScalarOrVectorOf<LLVM_AnyPointer>> {
+ LLVM_ScalarOrVectorOf<LLVM_AnyPointer>,
+ LLVM_ScalarOrVectorOf<LLVM_AnyPointer>,
+ [DeclareOpInterfaceMethods<PromotableOpInterface>]> {
let hasFolder = 1;
}
def LLVM_IntToPtrOp : LLVM_CastOp<"inttoptr", "IntToPtr",
diff --git a/mlir/include/mlir/Interfaces/CMakeLists.txt b/mlir/include/mlir/Interfaces/CMakeLists.txt
index 2cb1d489c4bfd..4e0f7ac5a0400 100644
--- a/mlir/include/mlir/Interfaces/CMakeLists.txt
+++ b/mlir/include/mlir/Interfaces/CMakeLists.txt
@@ -7,6 +7,7 @@ add_mlir_interface(DestinationStyleOpInterface)
add_mlir_interface(InferIntRangeInterface)
add_mlir_interface(InferTypeOpInterface)
add_mlir_interface(LoopLikeInterface)
+add_mlir_interface(Mem2RegInterfaces)
add_mlir_interface(ParallelCombiningOpInterface)
add_mlir_interface(RuntimeVerifiableOpInterface)
add_mlir_interface(ShapedOpInterfaces)
diff --git a/mlir/include/mlir/Interfaces/Mem2RegInterfaces.h b/mlir/include/mlir/Interfaces/Mem2RegInterfaces.h
new file mode 100644
index 0000000000000..c962d98624b42
--- /dev/null
+++ b/mlir/include/mlir/Interfaces/Mem2RegInterfaces.h
@@ -0,0 +1,39 @@
+//===-- Mem2RegInterfaces.h - Mem2Reg interfaces ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_MEM2REGINTERFACES_H
+#define MLIR_INTERFACES_MEM2REGINTERFACES_H
+
+#include "mlir/IR/Dominance.h"
+#include "mlir/IR/OpDefinition.h"
+
+namespace mlir {
+
+/// Represents a slot in memory. This is generated by an allocating operation
+/// (for example alloca).
+struct MemorySlot {
+ /// Pointer to the memory slot, used by operations to refer to it.
+ Value ptr;
+ /// Type of the value contained in the slot.
+ Type elemType;
+};
+
+/// Returned by operation promotion logic requesting the deletion of an
+/// operation.
+enum class DeletionKind {
+ /// Keep the operation after promotion.
+ Keep,
+ /// Delete the operation after promotion.
+ Delete,
+};
+
+} // namespace mlir
+
+#include "mlir/Interfaces/Mem2RegInterfaces.h.inc"
+
+#endif // MLIR_INTERFACES_MEM2REGINTERFACES_H
diff --git a/mlir/include/mlir/Interfaces/Mem2RegInterfaces.td b/mlir/include/mlir/Interfaces/Mem2RegInterfaces.td
new file mode 100644
index 0000000000000..b0d0a8cb54bdd
--- /dev/null
+++ b/mlir/include/mlir/Interfaces/Mem2RegInterfaces.td
@@ -0,0 +1,196 @@
+//===-- Mem2RegInterfaces.td - Mem2Reg interfaces ----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_MEM2REGINTERFACES
+#define MLIR_INTERFACES_MEM2REGINTERFACES
+
+include "mlir/IR/OpBase.td"
+
+def PromotableAllocationOpInterface
+ : OpInterface<"PromotableAllocationOpInterface"> {
+ let description = [{
+ Describes an operation allocating a memory slot that can be promoted into
+ SSA values.
+ }];
+ let cppNamespace = "::mlir";
+
+ let methods = [
+ InterfaceMethod<[{
+ Returns a list of memory slots for which promotion should be attempted.
+ This only considers the local semantics of the allocator, ignoring
+ whether the slot pointer is properly used or not. This allocator is the
+ "owner" of the returned slots, meaning no two allocators should return
+ the same slot. The content of the memory slot must only be reachable
+ using loads and stores to the provided slot pointer, no aliasing is
+ allowed.
+
+ Promotion of the slot will lead to the slot pointer no longer being
+ used, leaving the content of the memory slot unreachable.
+ }], "::llvm::SmallVector<::mlir::MemorySlot>", "getPromotableSlots",
+ (ins)
+ >,
+ InterfaceMethod<[{
+ Provides the default Value of this memory slot. The provided Value
+ will be used as the reaching definition of loads done before any store.
+ This Value must outlive the promotion and dominate all the uses of this
+ slot's pointer. The provided builder can be used to create the default
+ value on the fly.
+
+ The builder is located at the beginning of the block where the slot
+ pointer is defined.
+ }], "::mlir::Value", "getDefaultValue",
+ (ins "const ::mlir::MemorySlot &":$slot, "::mlir::OpBuilder &":$builder)
+ >,
+ InterfaceMethod<[{
+ Hook triggered for every new block argument added to a block.
+ This will only be called for slots declared by this operation.
+
+ The builder is located at the beginning of the block on call.
+ }],
+ "void", "handleBlockArgument",
+ (ins
+ "const ::mlir::MemorySlot &":$slot,
+ "::mlir::BlockArgument":$argument,
+ "::mlir::OpBuilder &":$builder
+ )
+ >,
+ InterfaceMethod<[{
+ Hook triggered once the promotion of a slot is complete. This can
+ also clean up the created default value if necessary.
+ This will only be called for slots declared by this operation.
+ }],
+ "void", "handlePromotionComplete",
+ (ins "const ::mlir::MemorySlot &":$slot, "::mlir::Value":$defaultValue)
+ >,
+ ];
+}
+
+def PromotableMemOpInterface : OpInterface<"PromotableMemOpInterface"> {
+ let description = [{
+ Describes an operation that can load from memory slots and/or store
+ to memory slots. Loads and stores must be of whole values of the same
+ type as the slot itself.
+
+ If the same operation does both loads and stores on the same slot, the
+ load must semantically happen first.
+ }];
+ let cppNamespace = "::mlir";
+
+ let methods = [
+ InterfaceMethod<[{
+ Gets whether this operation loads from the specified slot.
+ }],
+ "bool", "loadsFrom",
+ (ins "const ::mlir::MemorySlot &":$slot)
+ >,
+ InterfaceMethod<[{
+ Gets the value stored to the provided memory slot, or returns a null
+ value if this operation does not store to this slot. An operation
+ storing a value to a slot must always be able to provide the value it
+ stores. This method is only called on operations that use the slot.
+ }],
+ "::mlir::Value", "getStored",
+ (ins "const ::mlir::MemorySlot &":$slot)
+ >,
+ InterfaceMethod<[{
+ Checks that this operation can be promoted to no longer use the provided
+ blocking uses, in the context of promoting `slot`.
+
+ If the removal procedure of the use will require that other uses get
+ removed, that dependency should be added to the `newBlockingUses`
+ argument. Dependent uses must only be uses of results of this operation.
+ }], "bool", "canUsesBeRemoved",
+ (ins "const ::mlir::MemorySlot &":$slot,
+ "const ::llvm::SmallPtrSetImpl<::mlir::OpOperand *> &":$blockingUses,
+ "::llvm::SmallVectorImpl<::mlir::OpOperand *> &":$newBlockingUses)
+ >,
+ InterfaceMethod<[{
+ Transforms IR to ensure that the current operation does not use the
+ provided memory slot anymore. `reachingDefinition` contains the value
+ currently stored in the provided memory slot, immediately before the
+ current operation.
+
+ During the transformation, *no operation should be deleted*.
+ The operation can only schedule its own deletion by returning the
+ appropriate `DeletionKind`. The deletion must be legal assuming the
+ blocking uses passed through the `newBlockingUses` list in
+ `canUseBeRemoved` have been removed.
+
+ After calling this method, the blocking uses should have disappeared
+ or this operation should have scheduled its own deletion.
+
+ This method will only be called after ensuring promotion is allowed via
+ `canUseBeRemoved`. The requested blocking use removal may or may not
+ have been done at the point of calling this method, but it will be done
+ eventually.
+
+ The builder is located after the promotable operation on call.
+ }],
+ "::mlir::DeletionKind",
+ "removeBlockingUses",
+ (ins "const ::mlir::MemorySlot &":$slot,
+ "const ::llvm::SmallPtrSetImpl<mlir::OpOperand *> &":$blockingUses,
+ "::mlir::OpBuilder &":$builder,
+ "::mlir::Value":$reachingDefinition)
+ >,
+ ];
+}
+
+def PromotableOpInterface : OpInterface<"PromotableOpInterface"> {
+ let description = [{
+ Describes an operation that can be transformed or deleted so it no longer
+ uses a provided value (blocking use), in case this would allow the promotion
+ of a memory slot.
+ }];
+ let cppNamespace = "::mlir";
+
+ let methods = [
+ InterfaceMethod<[{
+ Checks that this operation can be promoted to no longer use the provided
+ blocking uses, in the context of promoting `slot`.
+
+ If the removal procedure of the use will require that other uses get
+ removed, that dependency should be added to the `newBlockingUses`
+ argument. Dependent uses must only be uses of results of this operation.
+ }], "bool", "canUsesBeRemoved",
+ (ins "const ::mlir::MemorySlot &":$slot,
+ "const ::llvm::SmallPtrSetImpl<::mlir::OpOperand *> &":$blockingUses,
+ "::llvm::SmallVectorImpl<::mlir::OpOperand *> &":$newBlockingUses)
+ >,
+ InterfaceMethod<[{
+ Transforms IR to ensure that the current operation does not use the
+ provided memory slot anymore. In contrast to `PromotableMemOpInterface`,
+ operations implementing this interface must not need access to the
+ reaching definition of the content of the slot.
+
+ During the transformation, *no operation should be deleted*.
+ The operation can only schedule its own deletion by returning the
+ appropriate `DeletionKind`. The deletion must be legal assuming the
+ blocking uses passed through the `newBlockingUses` list in
+ `canUseBeRemoved` have been removed.
+
+ After calling this method, the blocking uses should have disappeared
+ or this operation should have scheduled its own deletion.
+
+ This method will only be called after ensuring promotion is allowed via
+ `canUseBeRemoved`. The requested blocking use removal may or may not
+ have been done at the point of calling this method, but it will be done
+ eventually.
+
+ The builder is located after the promotable operation on call.
+ }],
+ "::mlir::DeletionKind",
+ "removeBlockingUses",
+ (ins "const ::mlir::MemorySlot &":$slot,
+ "const ::llvm::SmallPtrSetImpl<mlir::OpOperand *> &":$blockingUses,
+ "::mlir::OpBuilder &":$builder)
+ >,
+ ];
+}
+
+#endif // MLIR_INTERFACES_MEM2REGINTERFACES
diff --git a/mlir/include/mlir/Transforms/Mem2Reg.h b/mlir/include/mlir/Transforms/Mem2Reg.h
new file mode 100644
index 0000000000000..e2da88f7e00c7
--- /dev/null
+++ b/mlir/include/mlir/Transforms/Mem2Reg.h
@@ -0,0 +1,26 @@
+//===-- Mem2Reg.h - Mem2Reg definitions -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TRANSFORMS_MEM2REG_H
+#define MLIR_TRANSFORMS_MEM2REG_H
+
+#include "mlir/IR/Dominance.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/Interfaces/Mem2RegInterfaces.h"
+
+namespace mlir {
+
+/// Attempts to promote the memory slots of the provided allocators. Succeeds if
+/// at least one memory slot was promoted.
+LogicalResult
+tryToPromoteMemorySlots(ArrayRef<PromotableAllocationOpInterface> allocators,
+ OpBuilder &builder, DominanceInfo &dominance);
+
+} // namespace mlir
+
+#endif // MLIR_TRANSFORMS_MEM2REG_H
diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
index 1d95e256b9161..f5f76076c8e07 100644
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -33,6 +33,7 @@ class GreedyRewriteConfig;
#define GEN_PASS_DECL_CSEPASS
#define GEN_PASS_DECL_INLINER
#define GEN_PASS_DECL_LOOPINVARIANTCODEMOTION
+#define GEN_PASS_DECL_MEM2REG
#define GEN_PASS_DECL_PRINTIRPASS
#define GEN_PASS_DECL_PRINTOPSTATS
#define GEN_PASS_DECL_STRIPDEBUGINFO
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index cecefb35466bc..1cc357ca1f9f4 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -172,6 +172,25 @@ def LoopInvariantCodeMotion : Pass<"loop-invariant-code-motion"> {
let constructor = "mlir::createLoopInvariantCodeMotionPass()";
}
+def Mem2Reg : Pass<"mem2reg"> {
+ let summary = "Promotes memory slots into values.";
+ let description = [{
+ This pass removes loads out of and stores into a memory slot, and turns
+ them into direct uses of SSA values. This is done generically using the
+ `PromoteAllocationOpInterface`, `PromoteOpInterface` and
+ `PromoteMemOpInterface` interfaces.
+
+ This pass will attempt to compute which definitions of the content of
+ the memory slot reach operations that use the memory slot pointer. It
+ will rewire or remove operations that use the slot pointer so they no
+ longer use it. If any of this is not possible, the IR will be left
+ without mutation.
+
+ This pass only supports unstructured control-flow. Promotion of operations
+ within subregions will not happen.
+ }];
+}
+
def PrintOpStats : Pass<"print-op-stats"> {
let summary = "Print statistics of operations";
let constructor = "mlir::createPrintOpStatsPass()";
diff --git a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
index 7e631e7ddc802..9ba03153536c6 100644
--- a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
+++ b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
@@ -6,6 +6,7 @@ add_mlir_dialect_library(MLIRLLVMDialect
IR/LLVMDialect.cpp
IR/LLVMInlining.cpp
IR/LLVMInterfaces.cpp
+ IR/LLVMMem2Reg.cpp
IR/LLVMTypes.cpp
IR/LLVMTypeSyntax.cpp
@@ -33,6 +34,7 @@ add_mlir_dialect_library(MLIRLLVMDialect
MLIRDataLayoutInterfaces
MLIRInferTypeOpInterface
MLIRIR
+ MLIRMem2RegInterfaces
MLIRSideEffectInterfaces
MLIRSupport
)
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMem2Reg.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMem2Reg.cpp
new file mode 100644
index 0000000000000..7fa8ebceed5fb
--- /dev/null
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMem2Reg.cpp
@@ -0,0 +1,210 @@
+//===- LLVMMem2Reg.cpp - Mem2Reg Interfaces ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Mem2Reg-related interfaces for LLVM dialect operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Interfaces/Mem2RegInterfaces.h"
+
+using namespace mlir;
+
+//===----------------------------------------------------------------------===//
+// Interfaces for AllocaOp
+//===----------------------------------------------------------------------===//
+
+llvm::SmallVector<MemorySlot> LLVM::AllocaOp::getPromotableSlots() {
+ if (!getOperation()->getBlock()->isEntryBlock())
+ return {};
+
+ Type elemType =
+ getElemType() ? *getElemType() : getResult().getType().getElementType();
+ return {MemorySlot{getResult(), elemType}};
+}
+
+Value LLVM::AllocaOp::getDefaultValue(const MemorySlot &slot,
+ OpBuilder &builder) {
+ return builder.create<LLVM::UndefOp>(getLoc(), slot.elemType);
+}
+
+void LLVM::AllocaOp::handleBlockArgument(const MemorySlot &slot,
+ BlockArgument argument,
+ OpBuilder &builder) {
+ for (Operation *user : getOperation()->getUsers())
+ if (auto declareOp = llvm::dyn_cast<LLVM::DbgDeclareOp>(user))
+ builder.create<LLVM::DbgValueOp>(declareOp.getLoc(), argument,
+ declareOp.getVarInfo());
+}
+
+void LLVM::AllocaOp::handlePromotionComplete(const MemorySlot &slot,
+ Value defaultValue) {
+ if (defaultValue && defaultValue.use_empty())
+ defaultValue.getDefiningOp()->erase();
+ erase();
+}
+
+//===----------------------------------------------------------------------===//
+// Interfaces for LoadOp/StoreOp
+//===----------------------------------------------------------------------===//
+
+bool LLVM::LoadOp::loadsFrom(const MemorySlot &slot) {
+ return getAddr() == slot.ptr;
+}
+
+Value LLVM::LoadOp::getStored(const MemorySlot &slot) { return {}; }
+
+bool LLVM::StoreOp::loadsFrom(const MemorySlot &slot) { return false; }
+
+Value LLVM::StoreOp::getStored(const MemorySlot &slot) {
+ return getAddr() == slot.ptr ? getValue() : Value();
+}
+
+bool LLVM::LoadOp::canUsesBeRemoved(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ SmallVectorImpl<OpOperand *> &newBlockingUses) {
+ if (blockingUses.size() != 1)
+ return false;
+ Value blockingUse = (*blockingUses.begin())->get();
+ // If the blocking use is the slot ptr itself, there will be enough
+ // context to reconstruct the result of the load at removal time, so it can
+ // be removed (provided it loads the exact stored value and is not
+ // volatile).
+ return blockingUse == slot.ptr && getAddr() == slot.ptr &&
+ getResult().getType() == slot.elemType && !getVolatile_();
+}
+
+DeletionKind LLVM::LoadOp::removeBlockingUses(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ OpBuilder &builder, Value reachingDefinition) {
+ // `canUsesBeRemoved` checked this blocking use must be the loaded slot
+ // pointer.
+ getResult().replaceAllUsesWith(reachingDefinition);
+ return DeletionKind::Delete;
+}
+
+bool LLVM::StoreOp::canUsesBeRemoved(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ SmallVectorImpl<OpOperand *> &newBlockingUses) {
+ if (blockingUses.size() != 1)
+ return false;
+ Value blockingUse = (*blockingUses.begin())->get();
+ // If the blocking use is the slot ptr itself, dropping the store is
+ // fine, provided we are currently promoting its target value. Don't allow a
+ // store OF the slot pointer, only INTO the slot pointer.
+ return blockingUse == slot.ptr && getAddr() == slot.ptr &&
+ getValue() != slot.ptr && getValue().getType() == slot.elemType &&
+ !getVolatile_();
+}
+
+DeletionKind LLVM::StoreOp::removeBlockingUses(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ OpBuilder &builder, Value reachingDefinition) {
+ // `canUsesBeRemoved` checked this blocking use must be the stored slot
+ // pointer.
+ for (Operation *user : slot.ptr.getUsers())
+ if (auto declareOp = llvm::dyn_cast<LLVM::DbgDeclareOp>(user))
+ builder.create<LLVM::DbgValueOp>(declareOp->getLoc(), getValue(),
+ declareOp.getVarInfo());
+ return DeletionKind::Delete;
+}
+
+//===----------------------------------------------------------------------===//
+// Interfaces for discardable OPs
+//===----------------------------------------------------------------------===//
+
+/// Conditions the deletion of the operation to the removal of all its uses.
+static bool forwardToUsers(Operation *op,
+ SmallVectorImpl<OpOperand *> &newBlockingUses) {
+ for (Value result : op->getResults())
+ for (OpOperand &use : result.getUses())
+ newBlockingUses.push_back(&use);
+ return true;
+}
+
+bool LLVM::BitcastOp::canUsesBeRemoved(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ SmallVectorImpl<OpOperand *> &newBlockingUses) {
+ return forwardToUsers(*this, newBlockingUses);
+}
+
+DeletionKind LLVM::BitcastOp::removeBlockingUses(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ OpBuilder &builder) {
+ return DeletionKind::Delete;
+}
+
+bool LLVM::AddrSpaceCastOp::canUsesBeRemoved(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ SmallVectorImpl<OpOperand *> &newBlockingUses) {
+ return forwardToUsers(*this, newBlockingUses);
+}
+
+DeletionKind LLVM::AddrSpaceCastOp::removeBlockingUses(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ OpBuilder &builder) {
+ return DeletionKind::Delete;
+}
+
+bool LLVM::LifetimeStartOp::canUsesBeRemoved(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ SmallVectorImpl<OpOperand *> &newBlockingUses) {
+ return true;
+}
+
+DeletionKind LLVM::LifetimeStartOp::removeBlockingUses(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ OpBuilder &builder) {
+ return DeletionKind::Delete;
+}
+
+bool LLVM::LifetimeEndOp::canUsesBeRemoved(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ SmallVectorImpl<OpOperand *> &newBlockingUses) {
+ return true;
+}
+
+DeletionKind LLVM::LifetimeEndOp::removeBlockingUses(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ OpBuilder &builder) {
+ return DeletionKind::Delete;
+}
+
+bool LLVM::DbgDeclareOp::canUsesBeRemoved(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ SmallVectorImpl<OpOperand *> &newBlockingUses) {
+ return true;
+}
+
+DeletionKind LLVM::DbgDeclareOp::removeBlockingUses(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ OpBuilder &builder) {
+ return DeletionKind::Delete;
+}
+
+static bool hasAllZeroIndices(LLVM::GEPOp gepOp) {
+ return llvm::all_of(gepOp.getIndices(), [](auto index) {
+ auto indexAttr = index.template dyn_cast<IntegerAttr>();
+ return indexAttr && indexAttr.getValue() == 0;
+ });
+}
+
+bool LLVM::GEPOp::canUsesBeRemoved(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ SmallVectorImpl<OpOperand *> &newBlockingUses) {
+ // GEP can be removed as long as it is a no-op and its users can be removed.
+ if (!hasAllZeroIndices(*this))
+ return false;
+ return forwardToUsers(*this, newBlockingUses);
+}
+
+DeletionKind LLVM::GEPOp::removeBlockingUses(
+ const MemorySlot &slot, const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ OpBuilder &builder) {
+ return DeletionKind::Delete;
+}
diff --git a/mlir/lib/Interfaces/CMakeLists.txt b/mlir/lib/Interfaces/CMakeLists.txt
index 20073e7030557..dbf6e69a45255 100644
--- a/mlir/lib/Interfaces/CMakeLists.txt
+++ b/mlir/lib/Interfaces/CMakeLists.txt
@@ -9,6 +9,7 @@ set(LLVM_OPTIONAL_SOURCES
InferIntRangeInterface.cpp
InferTypeOpInterface.cpp
LoopLikeInterface.cpp
+ Mem2RegInterfaces.cpp
ParallelCombiningOpInterface.cpp
RuntimeVerifiableOpInterface.cpp
ShapedOpInterfaces.cpp
@@ -45,6 +46,7 @@ add_mlir_interface_library(DestinationStyleOpInterface)
add_mlir_interface_library(InferIntRangeInterface)
add_mlir_interface_library(InferTypeOpInterface)
add_mlir_interface_library(LoopLikeInterface)
+add_mlir_interface_library(Mem2RegInterfaces)
add_mlir_interface_library(ParallelCombiningOpInterface)
add_mlir_interface_library(RuntimeVerifiableOpInterface)
add_mlir_interface_library(ShapedOpInterfaces)
diff --git a/mlir/lib/Interfaces/Mem2RegInterfaces.cpp b/mlir/lib/Interfaces/Mem2RegInterfaces.cpp
new file mode 100644
index 0000000000000..aadd76b44df53
--- /dev/null
+++ b/mlir/lib/Interfaces/Mem2RegInterfaces.cpp
@@ -0,0 +1,11 @@
+//===-- Mem2RegInterfaces.cpp - Mem2Reg interfaces --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Interfaces/Mem2RegInterfaces.h"
+
+#include "mlir/Interfaces/Mem2RegInterfaces.cpp.inc"
diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt
index 4929a20847a78..7b4fb4d6df881 100644
--- a/mlir/lib/Transforms/CMakeLists.txt
+++ b/mlir/lib/Transforms/CMakeLists.txt
@@ -8,6 +8,7 @@ add_mlir_library(MLIRTransforms
Inliner.cpp
LocationSnapshot.cpp
LoopInvariantCodeMotion.cpp
+ Mem2Reg.cpp
OpStats.cpp
PrintIR.cpp
SCCP.cpp
@@ -27,6 +28,7 @@ add_mlir_library(MLIRTransforms
MLIRAnalysis
MLIRCopyOpInterface
MLIRLoopLikeInterface
+ MLIRMem2RegInterfaces
MLIRPass
MLIRRuntimeVerifiableOpInterface
MLIRSideEffectInterfaces
diff --git a/mlir/lib/Transforms/Mem2Reg.cpp b/mlir/lib/Transforms/Mem2Reg.cpp
new file mode 100644
index 0000000000000..5952de9ddb63b
--- /dev/null
+++ b/mlir/lib/Transforms/Mem2Reg.cpp
@@ -0,0 +1,562 @@
+//===- Mem2Reg.cpp - Promotes memory slots into values ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Transforms/Mem2Reg.h"
+#include "mlir/Analysis/SliceAnalysis.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Interfaces/ControlFlowInterfaces.h"
+#include "mlir/Transforms/Passes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/GenericIteratedDominanceFrontier.h"
+
+namespace mlir {
+#define GEN_PASS_DEF_MEM2REG
+#include "mlir/Transforms/Passes.h.inc"
+} // namespace mlir
+
+using namespace mlir;
+
+/// mem2reg
+///
+/// This pass turns unnecessary uses of automatically allocated memory slots
+/// into direct Value-based operations. For example, it will simplify storing a
+/// constant in a memory slot to immediately load it to a direct use of that
+/// constant. In other words, given a memory slot addressed by a non-aliased
+/// "pointer" Value, mem2reg removes all the uses of that pointer.
+///
+/// Within a block, this is done by following the chain of stores and loads of
+/// the slot and replacing the results of loads with the values previously
+/// stored. If a load happens before any other store, a poison value is used
+/// instead.
+///
+/// Control flow can create situations where a load could be replaced by
+/// multiple possible stores depending on the control flow path taken. As a
+/// result, this pass must introduce new block arguments in some blocks to
+/// accomodate for the multiple possible definitions. Each predecessor will
+/// populate the block argument with the definition reached at its end. With
+/// this, the value stored can be well defined at block boundaries, allowing
+/// the propagation of replacement through blocks.
+///
+/// This pass computes this transformation in four main steps:
+/// - A first step computes the list of operations that transitively use the
+/// memory slot we would like to promote. The purpose of this phase is to
+/// identify which uses must be removed to promote the slot, either by rewiring
+/// the user or deleting it. Naturally, direct uses of the slot must be removed.
+/// Sometimes additional uses must also be removed: this is notably the case
+/// when a direct user of the slot cannot rewire its use and must delete itself,
+/// and thus must make its users no longer use it. If any of those uses cannot
+/// be removed by their users in any way, promotion cannot continue: this is
+/// decided at this step.
+/// - A second step computes the list of blocks where a block argument will be
+/// needed ("merge points") without mutating the IR. These blocks are the blocks
+/// leading to a definition clash between two predecessors. Such blocks happen
+/// to be the Iterated Dominance Frontier (IDF) of the set of blocks containing
+/// a store, as they represent the point where a clear defining dominator stops
+/// existing. Computing this information in advance allows making sure the
+/// terminators that will forward values are capable of doing so (inability to
+/// do so aborts promotion at this step).
+/// - A third step computes the reaching definition of the memory slot at each
+/// blocking user. This is the core of the mem2reg algorithm, also known as
+/// load-store forwarding. This analyses loads and stores and propagates which
+/// value must be stored in the slot at each blocking user. This is achieved by
+/// doing a depth-first walk of the dominator tree of the function. This is
+/// sufficient because the reaching definition at the beginning of a block is
+/// either its new block argument if it is a merge block, or the definition
+/// reaching the end of its immediate dominator (parent in the dominator tree).
+/// We can therefore propagate this information down the dominator tree to
+/// proceed with renaming within blocks.
+/// - The final fourth step uses the reaching definition to remove blocking uses
+/// in topological order.
+///
+/// The two first steps do not mutate IR because promotion can still be aborted
+/// at this point. Once the two last steps are reached, promotion is guaranteed
+/// to succeed, allowing to start mutating IR.
+///
+/// For further reading, chapter three of SSA-based Compiler Design [1]
+/// showcases SSA construction, where mem2reg is an adaptation of the same
+/// process.
+///
+/// [1]: Rastello F. & Bouchez Tichadou F., SSA-based Compiler Design (2022),
+/// Springer.
+
+namespace {
+
+/// The SlotPromoter handles the state of promoting a memory slot. It wraps a
+/// slot and its associated allocator, along with analysis results related to
+/// the slot.
+class SlotPromoter {
+public:
+ SlotPromoter(MemorySlot slot, PromotableAllocationOpInterface allocator,
+ OpBuilder &builder, DominanceInfo &dominance);
+
+ /// Prepare data for the promotion of the slot while checking if it can be
+ /// promoted. Succeeds if the slot can be promoted. This method does not
+ /// mutate IR.
+ LogicalResult prepareSlotPromotion();
+
+ /// Actually promotes the slot by mutating IR. This method must only be
+ /// called after a successful call to `SlotPromoter::prepareSlotPromotion`.
+ /// Promoting a slot does not invalidate the preparation of other slots.
+ void promoteSlot();
+
+private:
+ /// This is the first step of the promotion algorithm.
+ /// Computes the transitive uses of the slot that block promotion. This finds
+ /// uses that would block the promotion, checks that the operation has a
+ /// solution to remove the blocking use, and potentially forwards the analysis
+ /// if the operation needs further blocking uses resolved to resolve its own
+ /// uses (typically, removing its users because it will delete itself to
+ /// resolve its own blocking uses). This will fail if one of the transitive
+ /// users cannot remove a requested use, and should prevent promotion.
+ LogicalResult computeBlockingUses();
+
+ /// Computes in which blocks the value stored in the slot is actually used,
+ /// meaning blocks leading to a load. This method uses `definingBlocks`, the
+ /// set of blocks containing a store to the slot (defining the value of the
+ /// slot).
+ SmallPtrSet<Block *, 16>
+ computeSlotLiveIn(SmallPtrSetImpl<Block *> &definingBlocks);
+
+ /// This is the second step of the promotion algorithm.
+ /// Computes the points in which multiple re-definitions of the slot's value
+ /// (stores) may conflict.
+ void computeMergePoints();
+
+ /// Ensures predecessors of merge points can properly provide their current
+ /// definition of the value stored in the slot to the merge point. This can
+ /// notably be an issue if the terminator used does not have the ability to
+ /// forward values through block operands.
+ bool areMergePointsUsable();
+
+ /// Computes the reaching definition for all the operations that require
+ /// promotion. `reachingDef` is the value the slot should contain at the
+ /// beginning of the block. This method returns the reached definition at the
+ /// end of the block.
+ Value computeReachingDefInBlock(Block *block, Value reachingDef);
+
+ /// This is the third step of the promotion algorithm.
+ /// Computes the reaching definition for all the operations that require
+ /// promotion. `reachingDef` corresponds to the initial value the
+ /// slot will contain before any write, typically a poison value.
+ void computeReachingDefInRegion(Region *region, Value reachingDef);
+
+ /// This is the fourth step of the promotion algorithm.
+ /// Removes the blocking uses of the slot, in topological order.
+ void removeBlockingUses();
+
+ /// Lazily-constructed default value representing the content of the slot when
+ /// no store has been executed. This function may mutate IR.
+ Value getLazyDefaultValue();
+
+ MemorySlot slot;
+ PromotableAllocationOpInterface allocator;
+ OpBuilder &builder;
+ /// Potentially non-initialized default value. Use `lazyDefaultValue` to
+ /// initialize it on demand.
+ Value defaultValue;
+ /// Blocks where multiple definitions of the slot value clash.
+ SmallPtrSet<Block *, 8> mergePoints;
+ /// Contains, for each operation, which uses must be eliminated by promotion.
+ /// This is a DAG structure because an operation that must eliminate some of
+ /// its uses always comes from a request from an operation that must
+ /// eliminate some of its own uses.
+ DenseMap<Operation *, SmallPtrSet<OpOperand *, 4>> userToBlockingUses;
+ /// Contains the reaching definition at this operation. Reaching definitions
+ /// are only computed for promotable memory operations with blocking uses.
+ DenseMap<PromotableMemOpInterface, Value> reachingDefs;
+ DominanceInfo &dominance;
+};
+
+} // namespace
+
+SlotPromoter::SlotPromoter(MemorySlot slot,
+ PromotableAllocationOpInterface allocator,
+ OpBuilder &builder, DominanceInfo &dominance)
+ : slot(slot), allocator(allocator), builder(builder), dominance(dominance) {
+ bool isResultOrNewBlockArgument = slot.ptr.getDefiningOp() == allocator;
+ if (BlockArgument arg = slot.ptr.dyn_cast<BlockArgument>())
+ isResultOrNewBlockArgument = isResultOrNewBlockArgument ||
+ arg.getOwner()->getParentOp() == allocator;
+ (void)isResultOrNewBlockArgument;
+ assert(isResultOrNewBlockArgument &&
+ "a slot must be a result of the allocator or an argument of the child "
+ "regions of the allocator");
+}
+
+Value SlotPromoter::getLazyDefaultValue() {
+ if (defaultValue)
+ return defaultValue;
+
+ OpBuilder::InsertionGuard guard(builder);
+ builder.setInsertionPointToStart(slot.ptr.getParentBlock());
+ return defaultValue = allocator.getDefaultValue(slot, builder);
+}
+
+LogicalResult SlotPromoter::computeBlockingUses() {
+ // The promotion of an operation may require the promotion of further
+ // operations (typically, removing operations that use an operation that must
+ // delete itself). We thus need to start from the use of the slot pointer and
+ // propagate further requests through the forward slice.
+
+ // First insert that all immediate users of the slot pointer must no longer
+ // use it.
+ for (OpOperand &use : slot.ptr.getUses()) {
+ SmallPtrSet<OpOperand *, 4> &blockingUses =
+ userToBlockingUses.getOrInsertDefault(use.getOwner());
+ blockingUses.insert(&use);
+ }
+
+ // Then, propagate the requirements for the removal of uses. The
+ // topologically-sorted forward slice allows for all blocking uses of an
+ // operation to have been computed before we reach it. Operations are
+ // traversed in topological order of their uses, starting from the slot
+ // pointer.
+ SetVector<Operation *> forwardSlice;
+ mlir::getForwardSlice(slot.ptr, &forwardSlice);
+ for (Operation *user : forwardSlice) {
+ // If the next operation has no blocking uses, everything is fine.
+ if (!userToBlockingUses.contains(user))
+ continue;
+
+ SmallPtrSet<OpOperand *, 4> &blockingUses = userToBlockingUses[user];
+
+ SmallVector<OpOperand *> newBlockingUses;
+ // If the operation decides it cannot deal with removing the blocking uses,
+ // promotion must fail.
+ if (auto promotable = dyn_cast<PromotableOpInterface>(user)) {
+ if (!promotable.canUsesBeRemoved(slot, blockingUses, newBlockingUses))
+ return failure();
+ } else if (auto promotable = dyn_cast<PromotableMemOpInterface>(user)) {
+ if (!promotable.canUsesBeRemoved(slot, blockingUses, newBlockingUses))
+ return failure();
+ } else {
+ // An operation that has blocking uses must be promoted. If it is not
+ // promotable, promotion must fail.
+ return failure();
+ }
+
+ // Then, register any new blocking uses for coming operations.
+ for (OpOperand *blockingUse : newBlockingUses) {
+ assert(llvm::find(user->getResults(), blockingUse->get()) !=
+ user->result_end());
+
+ SmallPtrSetImpl<OpOperand *> &newUserBlockingUseSet =
+ userToBlockingUses.getOrInsertDefault(blockingUse->getOwner());
+ newUserBlockingUseSet.insert(blockingUse);
+ }
+ }
+
+ // Because this pass currently only supports analysing the parent region of
+ // the slot pointer, if a promotable memory op that needs promotion is
+ // outside of this region, promotion must fail because it will be impossible
+ // to provide a valid `reachingDef` for it.
+ for (auto &[toPromote, _] : userToBlockingUses)
+ if (isa<PromotableMemOpInterface>(toPromote) &&
+ toPromote->getParentRegion() != slot.ptr.getParentRegion())
+ return failure();
+
+ return success();
+}
+
+SmallPtrSet<Block *, 16>
+SlotPromoter::computeSlotLiveIn(SmallPtrSetImpl<Block *> &definingBlocks) {
+ SmallPtrSet<Block *, 16> liveIn;
+
+ // The worklist contains blocks in which it is known that the slot value is
+ // live-in. The further blocks where this value is live-in will be inferred
+ // from these.
+ SmallVector<Block *> liveInWorkList;
+
+ // Blocks with a load before any other store to the slot are the starting
+ // points of the analysis. The slot value is definitely live-in in those
+ // blocks.
+ SmallPtrSet<Block *, 16> visited;
+ for (Operation *user : slot.ptr.getUsers()) {
+ if (visited.contains(user->getBlock()))
+ continue;
+ visited.insert(user->getBlock());
+
+ for (Operation &op : user->getBlock()->getOperations()) {
+ if (auto memOp = dyn_cast<PromotableMemOpInterface>(op)) {
+ // If this operation loads the slot, it is loading from it before
+ // ever writing to it, so the value is live-in in this block.
+ if (memOp.loadsFrom(slot)) {
+ liveInWorkList.push_back(user->getBlock());
+ break;
+ }
+
+ // If we store to the slot, further loads will see that value.
+ // Because we did not meet any load before, the value is not live-in.
+ if (memOp.getStored(slot))
+ break;
+ }
+ }
+ }
+
+ // The information is then propagated to the predecessors until a def site
+ // (store) is found.
+ while (!liveInWorkList.empty()) {
+ Block *liveInBlock = liveInWorkList.pop_back_val();
+
+ if (!liveIn.insert(liveInBlock).second)
+ continue;
+
+ // If a predecessor is a defining block, either:
+ // - It has a load before its first store, in which case it is live-in but
+ // has already been processed in the initialisation step.
+ // - It has a store before any load, in which case it is not live-in.
+ // We can thus at this stage insert to the worklist only predecessors that
+ // are not defining blocks.
+ for (Block *pred : liveInBlock->getPredecessors())
+ if (!definingBlocks.contains(pred))
+ liveInWorkList.push_back(pred);
+ }
+
+ return liveIn;
+}
+
+using IDFCalculator = llvm::IDFCalculatorBase<Block, false>;
+void SlotPromoter::computeMergePoints() {
+ if (slot.ptr.getParentRegion()->hasOneBlock())
+ return;
+
+ IDFCalculator idfCalculator(dominance.getDomTree(slot.ptr.getParentRegion()));
+
+ SmallPtrSet<Block *, 16> definingBlocks;
+ for (Operation *user : slot.ptr.getUsers())
+ if (auto storeOp = dyn_cast<PromotableMemOpInterface>(user))
+ if (storeOp.getStored(slot))
+ definingBlocks.insert(user->getBlock());
+
+ idfCalculator.setDefiningBlocks(definingBlocks);
+
+ SmallPtrSet<Block *, 16> liveIn = computeSlotLiveIn(definingBlocks);
+ idfCalculator.setLiveInBlocks(liveIn);
+
+ SmallVector<Block *> mergePointsVec;
+ idfCalculator.calculate(mergePointsVec);
+
+ mergePoints.insert(mergePointsVec.begin(), mergePointsVec.end());
+}
+
+bool SlotPromoter::areMergePointsUsable() {
+ for (Block *mergePoint : mergePoints)
+ for (Block *pred : mergePoint->getPredecessors())
+ if (!isa<BranchOpInterface>(pred->getTerminator()))
+ return false;
+
+ return true;
+}
+
+Value SlotPromoter::computeReachingDefInBlock(Block *block, Value reachingDef) {
+ for (Operation &op : block->getOperations()) {
+ if (auto memOp = dyn_cast<PromotableMemOpInterface>(op)) {
+ if (userToBlockingUses.contains(memOp))
+ reachingDefs.insert({memOp, reachingDef});
+
+ if (Value stored = memOp.getStored(slot))
+ reachingDef = stored;
+ }
+ }
+
+ return reachingDef;
+}
+
+void SlotPromoter::computeReachingDefInRegion(Region *region,
+ Value reachingDef) {
+ if (region->hasOneBlock()) {
+ computeReachingDefInBlock(®ion->front(), reachingDef);
+ return;
+ }
+
+ struct DfsJob {
+ llvm::DomTreeNodeBase<Block> *block;
+ Value reachingDef;
+ };
+
+ SmallVector<DfsJob> dfsStack;
+
+ auto &domTree = dominance.getDomTree(slot.ptr.getParentRegion());
+
+ dfsStack.emplace_back<DfsJob>(
+ {domTree.getNode(®ion->front()), reachingDef});
+
+ while (!dfsStack.empty()) {
+ DfsJob job = dfsStack.pop_back_val();
+ Block *block = job.block->getBlock();
+
+ if (mergePoints.contains(block)) {
+ BlockArgument blockArgument =
+ block->addArgument(slot.elemType, slot.ptr.getLoc());
+ builder.setInsertionPointToStart(block);
+ allocator.handleBlockArgument(slot, blockArgument, builder);
+ job.reachingDef = blockArgument;
+ }
+
+ job.reachingDef = computeReachingDefInBlock(block, job.reachingDef);
+
+ if (auto terminator = dyn_cast<BranchOpInterface>(block->getTerminator())) {
+ for (BlockOperand &blockOperand : terminator->getBlockOperands()) {
+ if (mergePoints.contains(blockOperand.get())) {
+ if (!job.reachingDef)
+ job.reachingDef = getLazyDefaultValue();
+ terminator.getSuccessorOperands(blockOperand.getOperandNumber())
+ .append(job.reachingDef);
+ }
+ }
+ }
+
+ for (auto *child : job.block->children())
+ dfsStack.emplace_back<DfsJob>({child, job.reachingDef});
+ }
+}
+
+void SlotPromoter::removeBlockingUses() {
+ llvm::SetVector<Operation *> usersToRemoveUses;
+ for (auto &user : llvm::make_first_range(userToBlockingUses))
+ usersToRemoveUses.insert(user);
+ SetVector<Operation *> sortedUsersToRemoveUses =
+ mlir::topologicalSort(usersToRemoveUses);
+
+ llvm::SmallVector<Operation *> toErase;
+ for (Operation *toPromote : llvm::reverse(sortedUsersToRemoveUses)) {
+ if (auto toPromoteMemOp = dyn_cast<PromotableMemOpInterface>(toPromote)) {
+ Value reachingDef = reachingDefs.lookup(toPromoteMemOp);
+ // If no reaching definition is known, this use is outside the reach of
+ // the slot. The default value should thus be used.
+ if (!reachingDef)
+ reachingDef = getLazyDefaultValue();
+
+ builder.setInsertionPointAfter(toPromote);
+ if (toPromoteMemOp.removeBlockingUses(slot, userToBlockingUses[toPromote],
+ builder, reachingDef) ==
+ DeletionKind::Delete)
+ toErase.push_back(toPromote);
+
+ continue;
+ }
+
+ auto toPromoteBasic = cast<PromotableOpInterface>(toPromote);
+ builder.setInsertionPointAfter(toPromote);
+ if (toPromoteBasic.removeBlockingUses(slot, userToBlockingUses[toPromote],
+ builder) == DeletionKind::Delete)
+ toErase.push_back(toPromote);
+ }
+
+ for (Operation *toEraseOp : toErase)
+ toEraseOp->erase();
+
+ assert(slot.ptr.use_empty() &&
+ "after promotion, the slot pointer should not be used anymore");
+}
+
+void SlotPromoter::promoteSlot() {
+ computeReachingDefInRegion(slot.ptr.getParentRegion(), {});
+
+ // Now that reaching definitions are known, remove all users.
+ removeBlockingUses();
+
+ // Update terminators in dead branches to forward default if they are
+ // succeeded by a merge points.
+ for (Block *mergePoint : mergePoints) {
+ for (BlockOperand &use : mergePoint->getUses()) {
+ auto user = cast<BranchOpInterface>(use.getOwner());
+ SuccessorOperands succOperands =
+ user.getSuccessorOperands(use.getOperandNumber());
+ assert(succOperands.size() == mergePoint->getNumArguments() ||
+ succOperands.size() + 1 == mergePoint->getNumArguments());
+ if (succOperands.size() + 1 == mergePoint->getNumArguments())
+ succOperands.append(getLazyDefaultValue());
+ }
+ }
+
+ allocator.handlePromotionComplete(slot, defaultValue);
+}
+
+LogicalResult SlotPromoter::prepareSlotPromotion() {
+ // First, find the set of operations that will need to be changed for the
+ // promotion to happen. These operations need to resolve some of their uses,
+ // either by rewiring them or simply deleting themselves. If any of them
+ // cannot find a way to resolve their blocking uses, we abort the promotion.
+ if (failed(computeBlockingUses()))
+ return failure();
+
+ // Then, compute blocks in which two or more definitions of the allocated
+ // variable may conflict. These blocks will need a new block argument to
+ // accomodate this.
+ computeMergePoints();
+
+ // The slot can be promoted if the block arguments to be created can
+ // actually be populated with values, which may not be possible depending
+ // on their predecessors.
+ return success(areMergePointsUsable());
+}
+
+LogicalResult mlir::tryToPromoteMemorySlots(
+ ArrayRef<PromotableAllocationOpInterface> allocators, OpBuilder &builder,
+ DominanceInfo &dominance) {
+ // Actual promotion may invalidate the dominance analysis, so slot promotion
+ // is prepated in batches.
+ SmallVector<SlotPromoter> toPromote;
+ for (PromotableAllocationOpInterface allocator : allocators) {
+ for (MemorySlot slot : allocator.getPromotableSlots()) {
+ if (slot.ptr.use_empty())
+ continue;
+
+ SlotPromoter promoter(slot, allocator, builder, dominance);
+ if (succeeded(promoter.prepareSlotPromotion()))
+ toPromote.emplace_back(std::move(promoter));
+ }
+ }
+
+ for (SlotPromoter &promoter : toPromote)
+ promoter.promoteSlot();
+
+ return success(!toPromote.empty());
+}
+
+namespace {
+
+struct Mem2Reg : impl::Mem2RegBase<Mem2Reg> {
+ void runOnOperation() override {
+ Operation *scopeOp = getOperation();
+ bool changed = false;
+
+ for (Region ®ion : scopeOp->getRegions()) {
+ if (region.getBlocks().empty())
+ continue;
+
+ OpBuilder builder(®ion.front(), region.front().begin());
+
+ // Promoting a slot can allow for further promotion of other slots,
+ // promotion is tried until no promotion succeeds.
+ while (true) {
+ DominanceInfo &dominance = getAnalysis<DominanceInfo>();
+
+ SmallVector<PromotableAllocationOpInterface> allocators;
+ // Build a list of allocators to attempt to promote the slots of.
+ for (Block &block : region)
+ for (Operation &op : block.getOperations())
+ if (auto allocator = dyn_cast<PromotableAllocationOpInterface>(op))
+ allocators.emplace_back(allocator);
+
+ // Attempt promoting until no promotion succeeds.
+ if (failed(tryToPromoteMemorySlots(allocators, builder, dominance)))
+ break;
+
+ changed = true;
+ getAnalysisManager().invalidate({});
+ }
+ }
+
+ if (!changed)
+ markAllAnalysesPreserved();
+ }
+};
+
+} // namespace
diff --git a/mlir/test/Transforms/mem2reg-llvmir-dbginfo.mlir b/mlir/test/Transforms/mem2reg-llvmir-dbginfo.mlir
new file mode 100644
index 0000000000000..d8d04dfcfec51
--- /dev/null
+++ b/mlir/test/Transforms/mem2reg-llvmir-dbginfo.mlir
@@ -0,0 +1,104 @@
+// RUN: mlir-opt %s --pass-pipeline='builtin.module(llvm.func(mem2reg))' | FileCheck %s
+
+llvm.func @use(i64)
+llvm.func @use_ptr(!llvm.ptr)
+
+#di_basic_type = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "ptr sized type", sizeInBits = 64>
+#di_file = #llvm.di_file<"test.ll" in "">
+#di_compile_unit = #llvm.di_compile_unit<sourceLanguage = DW_LANG_C_plus_plus_14, file = #di_file, producer = "clang", isOptimized = false, emissionKind = Full>
+#di_subprogram = #llvm.di_subprogram<compileUnit = #di_compile_unit, scope = #di_file, name = "blah", linkageName = "blah", file = #di_file, line = 7, subprogramFlags = Definition>
+// CHECK: #[[$VAR:.*]] = #llvm.di_local_variable<{{.*}}name = "ptr sized var"{{.*}}>
+#di_local_variable = #llvm.di_local_variable<scope = #di_subprogram, name = "ptr sized var", file = #di_file, line = 7, arg = 1, type = #di_basic_type>
+#di_local_variable_2 = #llvm.di_local_variable<scope = #di_subprogram, name = "ptr sized var 2", file = #di_file, line = 7, arg = 1, type = #di_basic_type>
+
+// CHECK-LABEL: llvm.func @basic_store_load
+llvm.func @basic_store_load(%arg0: i64) -> i64 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: = llvm.alloca
+ %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ // CHECK-NOT: llvm.store
+ llvm.store %arg0, %1 {alignment = 4 : i64} : i64, !llvm.ptr
+ // CHECK-NOT: llvm.intr.dbg.declare
+ llvm.intr.dbg.declare #di_local_variable = %1 : !llvm.ptr
+ // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[LOADED:.*]] : i64
+ // CHECK-NOT: llvm.intr.dbg.value
+ // CHECK-NOT: llvm.intr.dbg.declare
+ // CHECK-NOT: llvm.store
+ %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64
+ // CHECK: llvm.return %[[LOADED]] : i64
+ llvm.return %2 : i64
+}
+
+// CHECK-LABEL: llvm.func @block_argument_value
+// CHECK-SAME: (%[[ARG0:.*]]: i64, {{.*}})
+llvm.func @block_argument_value(%arg0: i64, %arg1: i1) -> i64 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: = llvm.alloca
+ %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ // CHECK-NOT: llvm.intr.dbg.declare
+ llvm.intr.dbg.declare #di_local_variable = %1 : !llvm.ptr
+ llvm.cond_br %arg1, ^bb1, ^bb2
+// CHECK: ^{{.*}}:
+^bb1:
+ // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[ARG0]]
+ // CHECK-NOT: llvm.intr.dbg.value
+ llvm.store %arg0, %1 {alignment = 4 : i64} : i64, !llvm.ptr
+ llvm.br ^bb2
+// CHECK: ^{{.*}}(%[[BLOCKARG:.*]]: i64):
+^bb2:
+ // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[BLOCKARG]]
+ %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64
+ llvm.return %2 : i64
+}
+
+// CHECK-LABEL: llvm.func @double_block_argument_value
+// CHECK-SAME: (%[[ARG0:.*]]: i64, {{.*}})
+llvm.func @double_block_argument_value(%arg0: i64, %arg1: i1) -> i64 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: = llvm.alloca
+ %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ // CHECK-NOT: llvm.intr.dbg.declare
+ llvm.intr.dbg.declare #di_local_variable = %1 : !llvm.ptr
+ llvm.cond_br %arg1, ^bb1, ^bb2
+// CHECK: ^{{.*}}(%[[BLOCKARG1:.*]]: i64):
+^bb1:
+ // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[BLOCKARG1]]
+ %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64
+ llvm.call @use(%2) : (i64) -> ()
+ // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[ARG0]]
+ llvm.store %arg0, %1 {alignment = 4 : i64} : i64, !llvm.ptr
+ llvm.br ^bb2
+ // CHECK-NOT: llvm.intr.dbg.value
+// CHECK: ^{{.*}}(%[[BLOCKARG2:.*]]: i64):
+^bb2:
+ // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[BLOCKARG2]]
+ llvm.br ^bb1
+}
+
+// CHECK-LABEL: llvm.func @always_drop_promoted_declare
+// CHECK-NOT: = llvm.alloca
+// CHECK-NOT: llvm.intr.dbg.
+llvm.func @always_drop_promoted_declare() {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ llvm.intr.dbg.declare #di_local_variable = %1 : !llvm.ptr
+ llvm.return
+}
+
+// CHECK-LABEL: llvm.func @keep_dbg_if_not_promoted
+llvm.func @keep_dbg_if_not_promoted() {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK: %[[ALLOCA:.*]] = llvm.alloca
+ %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ // CHECK-NOT: = llvm.alloca
+ // CHECK-NOT: llvm.intr.dbg.declare
+ // CHECK: llvm.intr.dbg.declare #[[$VAR]] = %[[ALLOCA]]
+ // CHECK-NOT: = llvm.alloca
+ // CHECK-NOT: llvm.intr.dbg.declare
+ // CHECK: llvm.call @use_ptr(%[[ALLOCA]])
+ llvm.intr.dbg.declare #di_local_variable = %1 : !llvm.ptr
+ %2 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ llvm.intr.dbg.declare #di_local_variable_2 = %2 : !llvm.ptr
+ llvm.call @use_ptr(%1) : (!llvm.ptr) -> ()
+ llvm.return
+}
diff --git a/mlir/test/Transforms/mem2reg-llvmir.mlir b/mlir/test/Transforms/mem2reg-llvmir.mlir
new file mode 100644
index 0000000000000..090f9133f7a96
--- /dev/null
+++ b/mlir/test/Transforms/mem2reg-llvmir.mlir
@@ -0,0 +1,685 @@
+// RUN: mlir-opt %s --pass-pipeline="builtin.module(llvm.func(mem2reg))" --split-input-file | FileCheck %s
+
+// CHECK-LABEL: llvm.func @default_value
+llvm.func @default_value() -> i32 {
+ // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : i32
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[UNDEF]] : i32
+ llvm.return %2 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @store_of_ptr
+llvm.func @store_of_ptr() {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(4 : i32) : i32
+ %2 = llvm.mlir.null : !llvm.ptr
+ // CHECK: %[[ALLOCA:.*]] = llvm.alloca
+ %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ // CHECK: llvm.store %{{.*}}, %[[ALLOCA]]
+ llvm.store %1, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+ // CHECK: llvm.store %[[ALLOCA]], %{{.*}}
+ llvm.store %3, %2 {alignment = 8 : i64} : !llvm.ptr, !llvm.ptr
+ llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @unreachable
+llvm.func @unreachable() {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(0 : i32) : i32
+ // CHECK-NOT: = llvm.alloca
+ %2 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ llvm.return
+
+// CHECK: ^{{.*}}:
+// CHECK-NEXT: llvm.return
+^bb1: // no predecessors
+ llvm.store %1, %2 {alignment = 4 : i64} : i32, !llvm.ptr
+ llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @unreachable_in_loop
+// CHECK-NOT: = llvm.alloca
+llvm.func @unreachable_in_loop() -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(6 : i32) : i32
+ %2 = llvm.mlir.constant(5 : i32) : i32
+ %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ llvm.store %1, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+ // CHECK: llvm.br ^[[LOOP:.*]]
+ llvm.br ^bb1
+
+// CHECK: ^[[LOOP]]:
+^bb1: // 2 preds: ^bb0, ^bb3
+ // CHECK-NEXT: llvm.br ^[[ENDOFLOOP:.*]]
+ llvm.store %2, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+ llvm.br ^bb3
+
+// CHECK: ^[[UNREACHABLE:.*]]:
+^bb2: // no predecessors
+ // CHECK-NEXT: llvm.br ^[[ENDOFLOOP]]
+ llvm.br ^bb3
+
+// CHECK: ^[[ENDOFLOOP]]:
+^bb3: // 2 preds: ^bb1, ^bb2
+ // CHECK-NEXT: llvm.br ^[[LOOP]]
+ llvm.br ^bb1
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @branching
+// CHECK-NOT: = llvm.alloca
+llvm.func @branching(%arg0: i1, %arg1: i1) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(2 : i32) : i32
+ %2 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ // CHECK: llvm.cond_br %{{.*}}, ^[[BB2:.*]](%{{.*}} : i32), ^{{.*}}
+ llvm.cond_br %arg0, ^bb2, ^bb1
+^bb1: // pred: ^bb0
+ llvm.store %1, %2 {alignment = 4 : i64} : i32, !llvm.ptr
+ // CHECK: llvm.cond_br %{{.*}}, ^[[BB2]](%{{.*}} : i32), ^[[BB2]](%{{.*}} : i32)
+ llvm.cond_br %arg1, ^bb2, ^bb2
+// CHECK: ^[[BB2]](%[[V3:.*]]: i32):
+^bb2: // 3 preds: ^bb0, ^bb1, ^bb1
+ %3 = llvm.load %2 {alignment = 4 : i64} : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[V3]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @recursive_alloca
+// CHECK-NOT: = llvm.alloca
+llvm.func @recursive_alloca() -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(0 : i32) : i32
+ %2 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ %4 = llvm.alloca %0 x !llvm.ptr {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ llvm.store %1, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+ llvm.store %3, %4 {alignment = 8 : i64} : !llvm.ptr, !llvm.ptr
+ %5 = llvm.load %4 {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr
+ %6 = llvm.load %5 {alignment = 4 : i64} : !llvm.ptr -> i32
+ llvm.store %6, %2 {alignment = 4 : i64} : i32, !llvm.ptr
+ %7 = llvm.load %2 {alignment = 4 : i64} : !llvm.ptr -> i32
+ llvm.return %7 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @reset_in_branch
+// CHECK-NOT: = llvm.alloca
+// CHECK-NOT: ^{{.*}}({{.*}}):
+llvm.func @reset_in_branch(%arg0: i32, %arg1: i1) {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(true) : i1
+ %2 = llvm.mlir.constant(false) : i1
+ %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ llvm.store %arg0, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+ llvm.cond_br %arg1, ^bb1, ^bb2
+^bb1: // pred: ^bb0
+ llvm.store %arg0, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+ %4 = llvm.load %3 {alignment = 4 : i64} : !llvm.ptr -> i32
+ llvm.call @reset_in_branch(%4, %2) : (i32, i1) -> ()
+ llvm.br ^bb3
+^bb2: // pred: ^bb0
+ %5 = llvm.load %3 {alignment = 4 : i64} : !llvm.ptr -> i32
+ llvm.call @reset_in_branch(%5, %1) : (i32, i1) -> ()
+ llvm.br ^bb3
+^bb3: // 2 preds: ^bb1, ^bb2
+ llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @intertwined_alloca
+// CHECK-NOT: = llvm.alloca
+llvm.func @intertwined_alloca(%arg0: !llvm.ptr, %arg1: i32) {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(0 : i32) : i32
+ %2 = llvm.alloca %0 x !llvm.ptr {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ %4 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ %5 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ %6 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ llvm.store %arg0, %2 {alignment = 8 : i64} : !llvm.ptr, !llvm.ptr
+ llvm.store %arg1, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+ llvm.store %1, %4 {alignment = 4 : i64} : i32, !llvm.ptr
+ llvm.br ^bb1
+^bb1: // 2 preds: ^bb0, ^bb4
+ %7 = llvm.load %3 {alignment = 4 : i64} : !llvm.ptr -> i32
+ %8 = llvm.add %7, %0 : i32
+ %9 = llvm.load %4 {alignment = 4 : i64} : !llvm.ptr -> i32
+ %10 = llvm.icmp "sgt" %8, %9 : i32
+ %11 = llvm.zext %10 : i1 to i32
+ llvm.cond_br %10, ^bb2, ^bb5
+^bb2: // pred: ^bb1
+ %12 = llvm.load %6 {alignment = 4 : i64} : !llvm.ptr -> i32
+ llvm.store %12, %5 {alignment = 4 : i64} : i32, !llvm.ptr
+ llvm.store %1, %6 {alignment = 4 : i64} : i32, !llvm.ptr
+ %13 = llvm.load %4 {alignment = 4 : i64} : !llvm.ptr -> i32
+ %14 = llvm.icmp "sgt" %13, %1 : i32
+ %15 = llvm.zext %14 : i1 to i32
+ llvm.cond_br %14, ^bb3, ^bb4
+^bb3: // pred: ^bb2
+ %16 = llvm.load %2 {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr
+ %17 = llvm.load %4 {alignment = 4 : i64} : !llvm.ptr -> i32
+ %18 = llvm.sub %17, %0 : i32
+ %19 = llvm.getelementptr %16[%18] : (!llvm.ptr, i32) -> !llvm.ptr, i8
+ %20 = llvm.load %5 {alignment = 4 : i64} : !llvm.ptr -> i32
+ %21 = llvm.trunc %20 : i32 to i8
+ llvm.store %21, %19 {alignment = 1 : i64} : i8, !llvm.ptr
+ llvm.br ^bb4
+^bb4: // 2 preds: ^bb2, ^bb3
+ %22 = llvm.load %4 {alignment = 4 : i64} : !llvm.ptr -> i32
+ %23 = llvm.add %22, %0 : i32
+ llvm.store %23, %4 {alignment = 4 : i64} : i32, !llvm.ptr
+ llvm.br ^bb1
+^bb5: // pred: ^bb1
+ llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @complex_cf
+// CHECK-NOT: = llvm.alloca
+llvm.func @complex_cf(%arg0: i32, ...) {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(false) : i1
+ %2 = llvm.mlir.constant(0 : i32) : i32
+ %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ llvm.cond_br %1, ^bb1, ^bb2
+^bb1: // pred: ^bb0
+ llvm.br ^bb2
+^bb2: // 2 preds: ^bb0, ^bb1
+ llvm.store %2, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+ llvm.br ^bb3
+^bb3: // 2 preds: ^bb2, ^bb16
+ llvm.cond_br %1, ^bb4, ^bb17
+^bb4: // pred: ^bb3
+ llvm.cond_br %1, ^bb5, ^bb14
+^bb5: // pred: ^bb4
+ llvm.cond_br %1, ^bb7, ^bb6
+^bb6: // pred: ^bb5
+ llvm.br ^bb7
+^bb7: // 2 preds: ^bb5, ^bb6
+ llvm.cond_br %1, ^bb9, ^bb8
+^bb8: // pred: ^bb7
+ llvm.br ^bb9
+^bb9: // 2 preds: ^bb7, ^bb8
+ llvm.cond_br %1, ^bb11, ^bb10
+^bb10: // pred: ^bb9
+ llvm.br ^bb11
+^bb11: // 2 preds: ^bb9, ^bb10
+ llvm.cond_br %1, ^bb12, ^bb13
+^bb12: // pred: ^bb11
+ llvm.br ^bb13
+^bb13: // 2 preds: ^bb11, ^bb12
+ llvm.br ^bb14
+^bb14: // 2 preds: ^bb4, ^bb13
+ llvm.cond_br %1, ^bb15, ^bb16
+^bb15: // pred: ^bb14
+ llvm.br ^bb16
+^bb16: // 2 preds: ^bb14, ^bb15
+ llvm.br ^bb3
+^bb17: // pred: ^bb3
+ llvm.br ^bb20
+^bb18: // no predecessors
+ %4 = llvm.load %3 {alignment = 4 : i64} : !llvm.ptr -> i32
+ llvm.br ^bb24
+^bb19: // no predecessors
+ llvm.br ^bb20
+^bb20: // 2 preds: ^bb17, ^bb19
+ llvm.cond_br %1, ^bb21, ^bb22
+^bb21: // pred: ^bb20
+ llvm.br ^bb23
+^bb22: // pred: ^bb20
+ llvm.br ^bb23
+^bb23: // 2 preds: ^bb21, ^bb22
+ llvm.br ^bb24
+^bb24: // 2 preds: ^bb18, ^bb23
+ llvm.br ^bb26
+^bb25: // no predecessors
+ llvm.br ^bb26
+^bb26: // 2 preds: ^bb24, ^bb25
+ llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @llvm_crash
+llvm.func @llvm_crash() -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(0 : i32) : i32
+ %2 = llvm.mlir.addressof @j : !llvm.ptr
+ %3 = llvm.mlir.constant(0 : i8) : i8
+ // CHECK-NOT: = llvm.alloca
+ // CHECK: %[[VOLATILE_ALLOCA:.*]] = llvm.alloca
+ // CHECK-NOT: = llvm.alloca
+ %4 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ %5 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ %6 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ %7 = llvm.bitcast %1 : i32 to i32
+ // CHECK: llvm.store volatile %{{.*}}, %[[VOLATILE_ALLOCA]]
+ llvm.store volatile %1, %5 {alignment = 4 : i64} : i32, !llvm.ptr
+ %8 = llvm.call @_setjmp(%2) : (!llvm.ptr) -> i32
+ %9 = llvm.icmp "ne" %8, %1 : i32
+ %10 = llvm.zext %9 : i1 to i8
+ %11 = llvm.icmp "ne" %10, %3 : i8
+ llvm.cond_br %11, ^bb1, ^bb2
+^bb1: // pred: ^bb0
+ // CHECK: = llvm.load volatile %[[VOLATILE_ALLOCA]]
+ %12 = llvm.load volatile %5 {alignment = 4 : i64} : !llvm.ptr -> i32
+ llvm.store %12, %6 {alignment = 4 : i64} : i32, !llvm.ptr
+ llvm.br ^bb3
+^bb2: // pred: ^bb0
+ // CHECK: llvm.store volatile %{{.*}}, %[[VOLATILE_ALLOCA]]
+ llvm.store volatile %0, %5 {alignment = 4 : i64} : i32, !llvm.ptr
+ llvm.call @g() : () -> ()
+ llvm.store %1, %6 {alignment = 4 : i64} : i32, !llvm.ptr
+ llvm.br ^bb3
+^bb3: // 2 preds: ^bb1, ^bb2
+ %13 = llvm.load %6 {alignment = 4 : i64} : !llvm.ptr -> i32
+ llvm.store %13, %4 {alignment = 4 : i64} : i32, !llvm.ptr
+ llvm.br ^bb4
+^bb4: // pred: ^bb3
+ %14 = llvm.load %4 {alignment = 4 : i64} : !llvm.ptr -> i32
+ llvm.return %14 : i32
+}
+llvm.mlir.global external @j() {addr_space = 0 : i32} : !llvm.array<1 x struct<"struct.__jmp_buf_tag", (array<6 x i32>, i32, struct<"struct.__sigset_t", (array<32 x i32>)>)>>
+llvm.func @_setjmp(!llvm.ptr) -> i32 attributes {passthrough = ["returns_twice"]}
+llvm.func @g()
+
+// -----
+
+// CHECK-LABEL: llvm.func amdgpu_kernelcc @addrspace_discard
+// CHECK-NOT: = llvm.alloca
+llvm.func amdgpu_kernelcc @addrspace_discard() {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(2 : i64) : i64
+ %2 = llvm.alloca %0 x i8 {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
+ %3 = llvm.addrspacecast %2 : !llvm.ptr<5> to !llvm.ptr
+ llvm.intr.lifetime.start 2, %3 : !llvm.ptr
+ llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @ignore_atomic
+// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32
+llvm.func @ignore_atomic(%arg0: i32) -> i32 {
+ // CHECK-NOT: = llvm.alloca
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ llvm.store %arg0, %1 atomic seq_cst {alignment = 4 : i64} : i32, !llvm.ptr
+ %2 = llvm.load %1 atomic seq_cst {alignment = 4 : i64} : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[ARG0]] : i32
+ llvm.return %2 : i32
+}
+
+// -----
+
+// CHECK: llvm.func @landing_pad
+// CHECK-NOT: = llvm.alloca
+llvm.func @landing_pad() -> i32 attributes {personality = @__gxx_personality_v0} {
+ // CHECK-NOT: = llvm.alloca
+ // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : i32
+ // CHECK-NOT: = llvm.alloca
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ // CHECK: %[[V2:.*]] = llvm.invoke
+ %2 = llvm.invoke @landing_padf() to ^bb1 unwind ^bb3 : () -> i32
+// CHECK: ^{{.*}}:
+^bb1:// pred: ^bb0
+ llvm.store %2, %1 {alignment = 4 : i64} : i32, !llvm.ptr
+ // CHECK: llvm.br ^[[BB2:.*]](%[[V2]] : i32)
+ llvm.br ^bb2
+// CHECK: ^[[BB2]]([[V3:.*]]: i32):
+^bb2:// 2 preds: ^bb1, ^bb3
+ %3 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+ // CHECK: llvm.return [[V3]] : i32
+ llvm.return %3 : i32
+// CHECK: ^{{.*}}:
+^bb3:// pred: ^bb0
+ %4 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)>
+ // CHECK: llvm.br ^[[BB2:.*]](%[[UNDEF]] : i32)
+ llvm.br ^bb2
+}
+llvm.func @landing_padf() -> i32
+llvm.func @__gxx_personality_v0(...) -> i32
+
+// -----
+
+// CHECK-LABEL: llvm.func @unreachable_defines
+llvm.func @unreachable_defines() -> i32 {
+ // CHECK-NOT: = llvm.alloca
+ // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : i32
+ // CHECK-NOT: = llvm.alloca
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ llvm.br ^bb1
+^bb1: // 2 preds: ^bb0, ^bb2
+ %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[UNDEF]] : i32
+ llvm.return %2 : i32
+^bb2: // no predecessors
+ %3 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+ llvm.store %3, %1 {alignment = 4 : i64} : i32, !llvm.ptr
+ llvm.br ^bb1
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @unreachable_jumps_to_merge_point
+// CHECK-NOT: = llvm.alloca
+llvm.func @unreachable_jumps_to_merge_point(%arg0: i1) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(6 : i32) : i32
+ %2 = llvm.mlir.constant(5 : i32) : i32
+ %3 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ llvm.cond_br %arg0, ^bb1, ^bb2
+^bb1: // 2 preds: ^bb0, ^bb4
+ llvm.store %1, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+ llvm.br ^bb4
+^bb2: // pred: ^bb0
+ llvm.store %2, %3 {alignment = 4 : i64} : i32, !llvm.ptr
+ llvm.br ^bb4
+^bb3: // no predecessors
+ llvm.br ^bb4
+^bb4: // 3 preds: ^bb1, ^bb2, ^bb3
+ %4 = llvm.load %3 {alignment = 4 : i64} : !llvm.ptr -> i32
+ llvm.return %4 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @ignore_lifetime
+// CHECK-NOT: = llvm.alloca
+llvm.func @ignore_lifetime() {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ llvm.intr.lifetime.start 2, %1 : !llvm.ptr
+ llvm.store %0, %1 {alignment = 4 : i64} : i32, !llvm.ptr
+ llvm.intr.lifetime.end 2, %1 : !llvm.ptr
+ llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @ignore_discardable_tree
+// CHECK-NOT: = llvm.alloca
+llvm.func @ignore_discardable_tree() {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(0 : i16) : i16
+ %2 = llvm.mlir.constant(0 : i8) : i8
+ %3 = llvm.mlir.undef : !llvm.struct<(i8, i16)>
+ %4 = llvm.insertvalue %2, %3[0] : !llvm.struct<(i8, i16)>
+ %5 = llvm.insertvalue %1, %4[1] : !llvm.struct<(i8, i16)>
+ %6 = llvm.alloca %0 x !llvm.struct<(i8, i16)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %7 = llvm.getelementptr %6[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(i8, i16)>
+ llvm.intr.lifetime.start 2, %7 : !llvm.ptr
+ llvm.store %5, %6 {alignment = 2 : i64} : !llvm.struct<(i8, i16)>, !llvm.ptr
+ llvm.intr.lifetime.end 2, %7 : !llvm.ptr
+ llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @store_load_forward
+llvm.func @store_load_forward() -> i32 {
+ // CHECK-NOT: = llvm.alloca
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK: %[[RES:.*]] = llvm.mlir.constant(0 : i32) : i32
+ %1 = llvm.mlir.constant(0 : i32) : i32
+ %2 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ llvm.store %1, %2 {alignment = 4 : i64} : i32, !llvm.ptr
+ %3 = llvm.load %2 {alignment = 4 : i64} : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[RES]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @store_load_wrong_type
+llvm.func @store_load_wrong_type() -> i16 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(0 : i32) : i32
+ // CHECK: = llvm.alloca
+ %2 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ llvm.store %1, %2 {alignment = 4 : i64} : i32, !llvm.ptr
+ %3 = llvm.load %2 {alignment = 2 : i64} : !llvm.ptr -> i16
+ llvm.return %3 : i16
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @merge_point_cycle
+llvm.func @merge_point_cycle() {
+ // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : i32
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(7 : i32) : i32
+ %2 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ // CHECK: llvm.br ^[[BB1:.*]](%[[UNDEF]] : i32)
+ llvm.br ^bb1
+// CHECK: ^[[BB1]](%[[BARG:.*]]: i32):
+^bb1: // 2 preds: ^bb0, ^bb1
+ %3 = llvm.load %2 {alignment = 4 : i64} : !llvm.ptr -> i32
+ // CHECK: = llvm.call @use(%[[BARG]])
+ %4 = llvm.call @use(%3) : (i32) -> i1
+ // CHECK: %[[DEF:.*]] = llvm.call @def
+ %5 = llvm.call @def(%1) : (i32) -> i32
+ llvm.store %5, %2 {alignment = 4 : i64} : i32, !llvm.ptr
+ // CHECK: llvm.cond_br %{{.*}}, ^[[BB1]](%[[DEF]] : i32), ^{{.*}}
+ llvm.cond_br %4, ^bb1, ^bb2
+^bb2: // pred: ^bb1
+ llvm.return
+}
+
+llvm.func @def(i32) -> i32
+llvm.func @use(i32) -> i1
+
+// -----
+
+// CHECK-LABEL: llvm.func @no_unnecessary_arguments
+llvm.func @no_unnecessary_arguments() {
+ // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : i32
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr
+ // CHECK: llvm.br ^[[BB1:.*]]
+ llvm.br ^bb1
+// CHECK: ^[[BB1]]:
+^bb1: // 2 preds: ^bb0, ^bb1
+ %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+ // CHECK: = llvm.call @use(%[[UNDEF]])
+ %3 = llvm.call @use(%2) : (i32) -> i1
+ // CHECK: llvm.cond_br %{{.*}}, ^[[BB1]], ^{{.*}}
+ llvm.cond_br %3, ^bb1, ^bb2
+^bb2: // pred: ^bb1
+ llvm.return
+}
+
+llvm.func @use(i32) -> i1
+
+// -----
+
+// CHECK-LABEL: llvm.func @discardable_use_tree
+// CHECK-NOT: = llvm.alloca
+llvm.func @discardable_use_tree() {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(2 : i64) : i64
+ %2 = llvm.alloca %0 x i8 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %3 = llvm.bitcast %2 : !llvm.ptr to !llvm.ptr
+ %4 = llvm.bitcast %3 : !llvm.ptr to !llvm.ptr
+ llvm.intr.lifetime.start 2, %3 : !llvm.ptr
+ llvm.intr.lifetime.start 2, %4 : !llvm.ptr
+ llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @non_discardable_use_tree
+llvm.func @non_discardable_use_tree() {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(2 : i64) : i64
+ // CHECK: = llvm.alloca
+ %2 = llvm.alloca %0 x i8 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %3 = llvm.bitcast %2 : !llvm.ptr to !llvm.ptr
+ %4 = llvm.bitcast %3 : !llvm.ptr to !llvm.ptr
+ llvm.intr.lifetime.start 2, %3 : !llvm.ptr
+ llvm.intr.lifetime.start 2, %4 : !llvm.ptr
+ llvm.call @use(%4) : (!llvm.ptr) -> i1
+ llvm.return
+}
+llvm.func @use(!llvm.ptr) -> i1
+
+// -----
+
+// CHECK-LABEL: llvm.func @trivial_get_element_ptr
+// CHECK-NOT: = llvm.alloca
+llvm.func @trivial_get_element_ptr() {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(2 : i64) : i64
+ %2 = llvm.alloca %0 x i8 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %3 = llvm.bitcast %2 : !llvm.ptr to !llvm.ptr
+ %4 = llvm.getelementptr %3[0, 0, 0] : (!llvm.ptr) -> !llvm.ptr, i8
+ llvm.intr.lifetime.start 2, %3 : !llvm.ptr
+ llvm.intr.lifetime.start 2, %4 : !llvm.ptr
+ llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @nontrivial_get_element_ptr
+llvm.func @nontrivial_get_element_ptr() {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(2 : i64) : i64
+ // CHECK: = llvm.alloca
+ %2 = llvm.alloca %0 x i8 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %3 = llvm.bitcast %2 : !llvm.ptr to !llvm.ptr
+ %4 = llvm.getelementptr %3[0, 1, 0] : (!llvm.ptr) -> !llvm.ptr, i8
+ llvm.intr.lifetime.start 2, %3 : !llvm.ptr
+ llvm.intr.lifetime.start 2, %4 : !llvm.ptr
+ llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @dynamic_get_element_ptr
+llvm.func @dynamic_get_element_ptr() {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(2 : i64) : i64
+ // CHECK: = llvm.alloca
+ %2 = llvm.alloca %0 x i8 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %3 = llvm.bitcast %2 : !llvm.ptr to !llvm.ptr
+ %4 = llvm.getelementptr %3[0, %0] : (!llvm.ptr, i32) -> !llvm.ptr, i8
+ llvm.intr.lifetime.start 2, %3 : !llvm.ptr
+ llvm.intr.lifetime.start 2, %4 : !llvm.ptr
+ llvm.return
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @live_cycle
+// CHECK-SAME: (%[[ARG0:.*]]: i64, %{{.*}}: i1, %[[ARG2:.*]]: i64) -> i64
+llvm.func @live_cycle(%arg0: i64, %arg1: i1, %arg2: i64) -> i64 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: = llvm.alloca
+ %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ llvm.store %arg2, %1 {alignment = 4 : i64} : i64, !llvm.ptr
+ // CHECK: llvm.cond_br %{{.*}}, ^[[BB1:.*]](%[[ARG2]] : i64), ^[[BB2:.*]](%[[ARG2]] : i64)
+ llvm.cond_br %arg1, ^bb1, ^bb2
+// CHECK: ^[[BB1]](%[[V1:.*]]: i64):
+^bb1:
+ %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64
+ // CHECK: llvm.call @use(%[[V1]])
+ llvm.call @use(%2) : (i64) -> ()
+ llvm.store %arg0, %1 {alignment = 4 : i64} : i64, !llvm.ptr
+ // CHECK: llvm.br ^[[BB2]](%[[ARG0]] : i64)
+ llvm.br ^bb2
+// CHECK: ^[[BB2]](%[[V2:.*]]: i64):
+^bb2:
+ // CHECK: llvm.br ^[[BB1]](%[[V2]] : i64)
+ llvm.br ^bb1
+}
+
+llvm.func @use(i64)
+
+// -----
+
+// This test should no longer be an issue once promotion within subregions
+// is supported.
+// CHECK-LABEL: llvm.func @subregion_block_promotion
+// CHECK-SAME: (%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64) -> i64
+llvm.func @subregion_block_promotion(%arg0: i64, %arg1: i64) -> i64 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK: %[[ALLOCA:.*]] = llvm.alloca
+ %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ // CHECK: llvm.store %[[ARG1]], %[[ALLOCA]]
+ llvm.store %arg1, %1 {alignment = 4 : i64} : i64, !llvm.ptr
+ // CHECK: scf.execute_region {
+ scf.execute_region {
+ // CHECK: llvm.store %[[ARG0]], %[[ALLOCA]]
+ llvm.store %arg0, %1 {alignment = 4 : i64} : i64, !llvm.ptr
+ scf.yield
+ }
+ // CHECK: }
+ // CHECK: %[[RES:.*]] = llvm.load %[[ALLOCA]]
+ %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64
+ // CHECK: llvm.return %[[RES]] : i64
+ llvm.return %2 : i64
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @subregion_simple_transitive_promotion
+// CHECK-SAME: (%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64) -> i64
+llvm.func @subregion_simple_transitive_promotion(%arg0: i64, %arg1: i64) -> i64 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: = llvm.alloca
+ %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ llvm.store %arg1, %1 {alignment = 4 : i64} : i64, !llvm.ptr
+ %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64
+ // CHECK: scf.execute_region {
+ scf.execute_region {
+ // CHECK: llvm.call @use(%[[ARG1]])
+ llvm.call @use(%2) : (i64) -> ()
+ scf.yield
+ }
+ // CHECK: }
+ // CHECK: llvm.return %[[ARG1]] : i64
+ llvm.return %2 : i64
+}
+
+llvm.func @use(i64)
+
+// -----
+
+// This behavior is specific to the LLVM dialect, because LLVM semantics are
+// that reaching an alloca multiple times allocates on the stack multiple
+// times. Promoting an alloca that is reached multiple times could lead to
+// changes in observable behavior. Thus only allocas in the entry block are
+// promoted.
+
+// CHECK-LABEL: llvm.func @no_inner_alloca_promotion
+// CHECK-SAME: (%[[ARG:.*]]: i64) -> i64
+llvm.func @no_inner_alloca_promotion(%arg: i64) -> i64 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ llvm.br ^bb1
+^bb1:
+ // CHECK: %[[ALLOCA:.*]] = llvm.alloca
+ %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ // CHECK: llvm.store %[[ARG]], %[[ALLOCA]]
+ llvm.store %arg, %1 {alignment = 4 : i64} : i64, !llvm.ptr
+ // CHECK: %[[RES:.*]] = llvm.load %[[ALLOCA]]
+ %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64
+ // CHECK: llvm.return %[[RES]] : i64
+ llvm.return %2 : i64
+}
More information about the Mlir-commits
mailing list