[Mlir-commits] [mlir] [mlir][mem2reg] Promote memory slots through transparent view operations (PR #196924)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue May 26 06:23:18 PDT 2026
https://github.com/jeanPerier updated https://github.com/llvm/llvm-project/pull/196924
>From 18f894775040e4eb61bdcc1a413578e1f080122c Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Mon, 11 May 2026 03:00:52 -0700
Subject: [PATCH 01/12] [mlir][mem2reg] Promote memory slots through
transparent view operations
---
.../mlir/Interfaces/MemorySlotInterfaces.h | 43 +++++++
.../mlir/Interfaces/MemorySlotInterfaces.td | 38 ++++++
mlir/lib/Interfaces/MemorySlotInterfaces.cpp | 109 ++++++++++++++++++
mlir/lib/Transforms/Mem2Reg.cpp | 63 +++++++---
mlir/test/Transforms/mem2reg.mlir | 91 +++++++++++++++
mlir/test/lib/Dialect/Test/TestOpDefs.cpp | 56 +++++++++
mlir/test/lib/Dialect/Test/TestOps.td | 26 +++++
7 files changed, 413 insertions(+), 13 deletions(-)
diff --git a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
index 7bebfc9a30064..2163593ef823e 100644
--- a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
+++ b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
@@ -30,6 +30,15 @@ struct DestructurableMemorySlot : public MemorySlot {
DenseMap<Attribute, Type> subelementTypes;
};
+/// Description of a memory slot view produced by a `PromotableOpInterface`
+/// operation: `slotPointerOperand` is the operand viewed by the op,
+/// `view.ptr` is the result aliasing it, and `view.elemType` is the type
+/// at which `view.ptr` aliases the underlying slot.
+struct PromotableSlotView {
+ Value slotPointerOperand;
+ MemorySlot view;
+};
+
/// Returned by operation promotion logic requesting the deletion of an
/// operation.
enum class DeletionKind {
@@ -44,4 +53,38 @@ enum class DeletionKind {
#include "mlir/Interfaces/MemorySlotOpInterfaces.h.inc"
#include "mlir/Interfaces/MemorySlotTypeInterfaces.h.inc"
+namespace mlir {
+
+/// Returns true if `value` is `rootSlot.ptr` or a transitive view of it,
+/// following `PromotableOpInterface::getPromotableSlotView` chains. The
+/// element type at which `value` aliases the slot is written to
+/// `*outViewElemType` (equal to `rootSlot.elemType` when the chain is empty).
+bool isPromotableSlotView(Value value, const MemorySlot &rootSlot,
+ Type *outViewElemType = nullptr);
+
+/// Returns a MemorySlot whose `ptr` is the operand of `op` that is a
+/// (possibly transitive) view of `rootSlot.ptr`, with `elemType` equal to
+/// the type at which that operand aliases the slot. Mem2Reg uses this to
+/// hand each `PromotableMemOpInterface` op a slot description tailored to
+/// its memref operand. Returns `nullopt` if no operand is a view of
+/// `rootSlot`.
+std::optional<MemorySlot> getOpViewSlot(Operation *op,
+ const MemorySlot &rootSlot);
+
+/// Converts `slotValue` (typed at `rootSlot.elemType`) to the type at which
+/// `viewPtr` aliases `rootSlot`, by chaining
+/// `PromotableOpInterface::convertSlotValue` calls along the view chain
+/// root-to-leaf. Returns `nullptr` if any step's converter fails.
+Value convertSlotValueToViewValue(Value slotValue, Value viewPtr,
+ const MemorySlot &rootSlot,
+ OpBuilder &builder);
+
+/// Inverse of `convertSlotValueToViewValue`: converts `viewValue` back to
+/// `rootSlot.elemType` along the chain leaf-to-root.
+Value convertViewValueToSlotValue(Value viewValue, Value viewPtr,
+ const MemorySlot &rootSlot,
+ OpBuilder &builder);
+
+} // namespace mlir
+
#endif // MLIR_INTERFACES_MEMORYSLOTINTERFACES_H
diff --git a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td
index 801555fba4947..a8084ab7bf189 100644
--- a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td
+++ b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td
@@ -263,6 +263,44 @@ def PromotableOpInterface : OpInterface<"PromotableOpInterface"> {
(ins "::llvm::ArrayRef<std::pair<::mlir::Operation*, ::mlir::Value>>":$mutatedDefs,
"::mlir::OpBuilder &":$builder), [{}], [{ return; }]
>,
+ InterfaceMethod<[{
+ Describes this operation as a transparent view of a memory slot
+ reached through one of its operands.
+
+ The returned `view.ptr` must be a result of this operation;
+ `view.elemType` is the type at which `view.ptr` aliases the slot
+ pointed to by `slotPointerOperand`, possibly different from the
+ underlying slot's element type.
+
+ Returning a view here implies `convertSlotValue` can bridge
+ between `slotPointerOperand`'s element type and `view.elemType`
+ in both directions; if no such conversion exists, return
+ `std::nullopt`.
+
+ No IR mutation is allowed in this method.
+ }],
+ "::std::optional<::mlir::PromotableSlotView>",
+ "getPromotableSlotView",
+ (ins), [{}],
+ [{ return std::nullopt; }]
+ >,
+ InterfaceMethod<[{
+ Builds a value of `targetType` from `value`, bridging the
+ underlying slot's element type and the view's element type.
+ Mem2reg calls this in both directions (load: slot → view; store:
+ view → slot).
+ }],
+ "::mlir::Value",
+ "convertSlotValue",
+ (ins "::mlir::Value":$value,
+ "::mlir::Type":$targetType,
+ "::mlir::OpBuilder &":$builder), [{}],
+ [{
+ if (value.getType() == targetType)
+ return value;
+ return ::mlir::Value{};
+ }]
+ >,
];
}
diff --git a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
index 2c9e23250e9ee..1e1961eeca07c 100644
--- a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
+++ b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
@@ -8,5 +8,114 @@
#include "mlir/Interfaces/MemorySlotInterfaces.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+
#include "mlir/Interfaces/MemorySlotOpInterfaces.cpp.inc"
#include "mlir/Interfaces/MemorySlotTypeInterfaces.cpp.inc"
+
+using namespace mlir;
+
+namespace {
+/// One step in a view chain, leaf-first. `inputElemType` is the elemType
+/// of the slot one step closer to root; `outputElemType` is the elemType
+/// this step exposes.
+struct ViewStep {
+ PromotableOpInterface view;
+ Type inputElemType;
+ Type outputElemType;
+};
+} // namespace
+
+/// Walks back from `value` to `rootSlot.ptr` along
+/// `getPromotableSlotView` chains. On success, populates `chainOut` with
+/// the view ops leaf-to-root and writes the type at which `value` aliases
+/// the underlying slot to `*outViewElemType`.
+static bool walkPromotableSlotViewChain(Value value, const MemorySlot &rootSlot,
+ SmallVectorImpl<ViewStep> &chainOut,
+ Type *outViewElemType) {
+ if (value == rootSlot.ptr) {
+ if (outViewElemType)
+ *outViewElemType = rootSlot.elemType;
+ return true;
+ }
+
+ Value current = value;
+ Type aliasElemType{};
+ llvm::SmallPtrSet<Value, 4> seen;
+ while (current != rootSlot.ptr) {
+ if (!seen.insert(current).second)
+ return false;
+ auto promotable =
+ dyn_cast_or_null<PromotableOpInterface>(current.getDefiningOp());
+ if (!promotable)
+ return false;
+ std::optional<PromotableSlotView> info = promotable.getPromotableSlotView();
+ if (!info || info->view.ptr != current)
+ return false;
+ if (!aliasElemType)
+ aliasElemType = info->view.elemType;
+ chainOut.push_back(ViewStep{promotable, /*inputElemType=*/Type{},
+ /*outputElemType=*/info->view.elemType});
+ current = info->slotPointerOperand;
+ }
+
+ // Fill in each step's `inputElemType` from the previous step's output
+ // (or `rootSlot.elemType` for the root-most step).
+ Type prevOutput = rootSlot.elemType;
+ for (ViewStep &step : llvm::reverse(chainOut)) {
+ step.inputElemType = prevOutput;
+ prevOutput = step.outputElemType;
+ }
+
+ if (outViewElemType)
+ *outViewElemType = aliasElemType ? aliasElemType : rootSlot.elemType;
+ return true;
+}
+
+bool mlir::isPromotableSlotView(Value value, const MemorySlot &rootSlot,
+ Type *outViewElemType) {
+ SmallVector<ViewStep> chain;
+ return walkPromotableSlotViewChain(value, rootSlot, chain, outViewElemType);
+}
+
+std::optional<MemorySlot> mlir::getOpViewSlot(Operation *op,
+ const MemorySlot &rootSlot) {
+ for (Value operand : op->getOperands()) {
+ Type viewElemType;
+ if (isPromotableSlotView(operand, rootSlot, &viewElemType))
+ return MemorySlot{operand, viewElemType};
+ }
+ return std::nullopt;
+}
+
+Value mlir::convertSlotValueToViewValue(Value slotValue, Value viewPtr,
+ const MemorySlot &rootSlot,
+ OpBuilder &builder) {
+ SmallVector<ViewStep> chain;
+ if (!walkPromotableSlotViewChain(viewPtr, rootSlot, chain, /*out=*/nullptr))
+ return {};
+ Value current = slotValue;
+ // Root-to-leaf walk: reverse the leaf-first chain.
+ for (ViewStep &step : llvm::reverse(chain)) {
+ current = step.view.convertSlotValue(current, step.outputElemType, builder);
+ if (!current)
+ return {};
+ }
+ return current;
+}
+
+Value mlir::convertViewValueToSlotValue(Value viewValue, Value viewPtr,
+ const MemorySlot &rootSlot,
+ OpBuilder &builder) {
+ SmallVector<ViewStep> chain;
+ if (!walkPromotableSlotViewChain(viewPtr, rootSlot, chain, /*out=*/nullptr))
+ return {};
+ Value current = viewValue;
+ for (ViewStep &step : chain) {
+ current = step.view.convertSlotValue(current, step.inputElemType, builder);
+ if (!current)
+ return {};
+ }
+ return current;
+}
diff --git a/mlir/lib/Transforms/Mem2Reg.cpp b/mlir/lib/Transforms/Mem2Reg.cpp
index 40d08d869a9e2..4f7039bede304 100644
--- a/mlir/lib/Transforms/Mem2Reg.cpp
+++ b/mlir/lib/Transforms/Mem2Reg.cpp
@@ -400,14 +400,15 @@ LogicalResult MemorySlotPromotionAnalyzer::computeBlockingUses(
return failure();
regionsWithDirectUse.insert(user->getParentRegion());
} else if (auto promotable = dyn_cast<PromotableMemOpInterface>(user)) {
- if (!promotable.canUsesBeRemoved(slot, blockingUses, newBlockingUses,
+ MemorySlot viewSlot = getOpViewSlot(user, slot).value_or(slot);
+ if (!promotable.canUsesBeRemoved(viewSlot, blockingUses, newBlockingUses,
dataLayout))
return failure();
// Operations that interact with the slot's memory will be promoted using
// a reaching definition. Therefore, the operation must be within a region
// where the reaching definition can be computed.
- if (promotable.storesTo(slot))
+ if (promotable.storesTo(viewSlot))
regionsWithDirectStore.insert(user->getParentRegion());
else
regionsWithDirectUse.insert(user->getParentRegion());
@@ -515,11 +516,17 @@ MemorySlotPromotionAnalyzer::computeInfo() {
// Compute the blocks containing a store for each region, either directly or
// inherited from a nested region. As a side effect, `definingBlocks` contains
// all regions with at least one store.
+ //
+ // Iterating `info.userToBlockingUses` lets this also pick up stores that
+ // reach the slot through chains of views (`getPromotableSlotView`).
DenseMap<Region *, SmallPtrSet<Block *, 16>> definingBlocks;
- for (Operation *user : slot.ptr.getUsers())
- if (auto storeOp = dyn_cast<PromotableMemOpInterface>(user))
- if (storeOp.storesTo(slot))
- definingBlocks[user->getParentRegion()].insert(user->getBlock());
+ for (auto &[region, opsMap] : info.userToBlockingUses)
+ for (auto &[user, _blockingUses] : opsMap)
+ if (auto storeOp = dyn_cast<PromotableMemOpInterface>(user)) {
+ MemorySlot viewSlot = getOpViewSlot(user, slot).value_or(slot);
+ if (storeOp.storesTo(viewSlot))
+ definingBlocks[region].insert(user->getBlock());
+ }
for (auto &[region, regionInfo] : info.regionsToPromote)
if (regionInfo.hasValueStores)
definingBlocks[region->getParentRegion()].insert(
@@ -550,18 +557,37 @@ Value MemorySlotPromoter::promoteInBlock(Block *block, Value reachingDef) {
if (info.userToBlockingUses[memOp->getParentRegion()].contains(memOp))
reachingDefs.insert({memOp, reachingDef});
- if (memOp.storesTo(slot)) {
+ MemorySlot viewSlot = getOpViewSlot(memOp, slot).value_or(slot);
+ if (memOp.storesTo(viewSlot)) {
builder.setInsertionPointAfter(memOp);
// To not expose default value creation to the interfaces, if we have
// no reaching definition by now, we set it to the default value.
// This is slightly too eager as `getStored` may not need it.
if (!reachingDef)
reachingDef = getOrCreateDefaultValue();
- Value stored = memOp.getStored(slot, builder, reachingDef, dataLayout);
+ Value reachingDefAtStore = reachingDef;
+ if (slot.ptr != viewSlot.ptr) {
+ // The store sees the slot at `viewSlot.elemType`; convert the
+ // reaching definition (at root elem type) before handing it to
+ // `getStored`.
+ reachingDefAtStore = convertSlotValueToViewValue(
+ reachingDef, viewSlot.ptr, slot, builder);
+ assert(reachingDefAtStore && "convertSlotValue contract violation");
+ }
+ Value stored =
+ memOp.getStored(viewSlot, builder, reachingDefAtStore, dataLayout);
assert(stored && "a memory operation storing to a slot must provide a "
"new definition of the slot");
- reachingDef = stored;
+ // `replacedValuesMap` keeps `stored` at `viewSlot.elemType` for
+ // `visitReplacedValues`; the new reaching definition is tracked at
+ // the root slot's elem type, so convert `stored` back.
replacedValuesMap[memOp] = stored;
+ if (viewSlot.ptr != slot.ptr) {
+ stored =
+ convertViewValueToSlotValue(stored, viewSlot.ptr, slot, builder);
+ assert(stored && "convertSlotValue contract violation");
+ }
+ reachingDef = stored;
}
}
@@ -763,11 +789,22 @@ void MemorySlotPromoter::removeBlockingUses(Region *region) {
reachingDef = getOrCreateDefaultValue();
builder.setInsertionPointAfter(toPromote);
- if (toPromoteMemOp.removeBlockingUses(slot, blockingUsesMap[toPromote],
- builder, reachingDef,
- dataLayout) == DeletionKind::Delete)
+ MemorySlot viewSlot = getOpViewSlot(toPromote, slot).value_or(slot);
+ Value reachingDefAtBlockingUse = reachingDef;
+ if (viewSlot.ptr != slot.ptr) {
+ // Convert the reaching definition to `viewSlot.elemType` to match
+ // what the impl sees. Skipped when the chain is empty; any cast
+ // unused by the impl will be cleaned up by DCE.
+ reachingDefAtBlockingUse = convertSlotValueToViewValue(
+ reachingDef, viewSlot.ptr, slot, builder);
+ assert(reachingDefAtBlockingUse &&
+ "convertSlotValue contract violation");
+ }
+ if (toPromoteMemOp.removeBlockingUses(
+ viewSlot, blockingUsesMap[toPromote], builder,
+ reachingDefAtBlockingUse, dataLayout) == DeletionKind::Delete)
toErase.insert(toPromote);
- if (toPromoteMemOp.storesTo(slot))
+ if (toPromoteMemOp.storesTo(viewSlot))
if (Value replacedValue = replacedValuesMap[toPromoteMemOp])
replacedValues.push_back({toPromoteMemOp, replacedValue});
continue;
diff --git a/mlir/test/Transforms/mem2reg.mlir b/mlir/test/Transforms/mem2reg.mlir
index 94b721cf28dcf..edc0a37807b7b 100644
--- a/mlir/test/Transforms/mem2reg.mlir
+++ b/mlir/test/Transforms/mem2reg.mlir
@@ -181,3 +181,94 @@ func.func @poison_insertion_point(%val: f64) {
^bb3:
return
}
+
+// -----
+
+// Verifies that mem2reg promotes a memory slot whose stores and loads are
+// reached through a transparent view operation that exposes itself via
+// PromotableOpInterface::getPromotableSlotView. The conditional store on
+// the view in ^bb1 must be discovered as a defining block, otherwise the
+// merge point at ^bb2 would not get a block argument and the promotion
+// would silently drop the conditional update.
+
+// CHECK-LABEL: func.func @promotable_through_view
+// CHECK-SAME: (%[[A:.*]]: i32, %[[COND:.*]]: i1) -> i32
+// CHECK-NOT: test.multi_slot_alloca
+// CHECK-NOT: test.transparent_view
+// CHECK: %[[C42:.*]] = arith.constant 42 : i32
+// CHECK: cf.cond_br %[[COND]], ^[[BB1:.*]], ^[[BB2:.*]](%[[C42]] : i32)
+// CHECK: ^[[BB1]]:
+// CHECK: cf.br ^[[BB2]](%[[A]] : i32)
+// CHECK: ^[[BB2]](%[[MERGE:.*]]: i32):
+// CHECK: return %[[MERGE]] : i32
+func.func @promotable_through_view(%a: i32, %cond: i1) -> i32 {
+ %c42 = arith.constant 42 : i32
+ %slot = test.multi_slot_alloca : () -> memref<i32>
+ %view = test.transparent_view %slot : (memref<i32>) -> memref<i32>
+ memref.store %c42, %view[] : memref<i32>
+ cf.cond_br %cond, ^bb1, ^bb2
+^bb1:
+ memref.store %a, %view[] : memref<i32>
+ cf.br ^bb2
+^bb2:
+ %v = memref.load %view[] : memref<i32>
+ return %v : i32
+}
+
+// -----
+
+// Type-changing transparent view: the store and load see the slot at f32
+// while the underlying allocation is at i32. mem2reg materialises an
+// `unrealized_conversion_cast` (the view op's `convertSlotValue`) at the
+// store (f32 → i32 to update the reaching def at the slot's elem type) and
+// at the load (i32 → f32 to feed the load's f32 result type).
+
+// CHECK-LABEL: func.func @promotable_through_cast_view
+// CHECK-SAME: (%[[A:.*]]: f32) -> f32
+// CHECK-NOT: test.multi_slot_alloca
+// CHECK-NOT: test.transparent_cast_view
+// CHECK: %[[I32:.*]] = builtin.unrealized_conversion_cast %[[A]] : f32 to i32
+// CHECK: %{{.*}} = builtin.unrealized_conversion_cast %[[I32]] : i32 to f32
+// CHECK: return %{{.*}} : f32
+func.func @promotable_through_cast_view(%a: f32) -> f32 {
+ %slot = test.multi_slot_alloca : () -> memref<i32>
+ %view = test.transparent_cast_view %slot : (memref<i32>) -> memref<f32>
+ memref.store %a, %view[] : memref<f32>
+ %v = memref.load %view[] : memref<f32>
+ return %v : f32
+}
+
+// -----
+
+// Same as above with a conditional store across blocks. The merge-point
+// block argument is at the root slot's element type (i32), and the
+// `convertSlotValue` casts are inserted at the store sites (f32 → i32) so
+// the merge argument can carry the conditional update; the load site
+// inserts the inverse cast (i32 → f32) for its result.
+
+// CHECK-LABEL: func.func @promotable_through_cast_view_blocks
+// CHECK-SAME: (%[[A:.*]]: f32, %[[COND:.*]]: i1) -> f32
+// CHECK-NOT: test.multi_slot_alloca
+// CHECK-NOT: test.transparent_cast_view
+// CHECK: %[[CST:.*]] = arith.constant 1.000000e+00 : f32
+// CHECK: %[[CST_I32:.*]] = builtin.unrealized_conversion_cast %[[CST]] : f32 to i32
+// CHECK: cf.cond_br %[[COND]], ^[[BB1:.*]], ^[[BB2:.*]](%[[CST_I32]] : i32)
+// CHECK: ^[[BB1]]:
+// CHECK: %[[A_I32:.*]] = builtin.unrealized_conversion_cast %[[A]] : f32 to i32
+// CHECK: cf.br ^[[BB2]](%[[A_I32]] : i32)
+// CHECK: ^[[BB2]](%[[MERGE:.*]]: i32):
+// CHECK: %[[MERGE_F32:.*]] = builtin.unrealized_conversion_cast %[[MERGE]] : i32 to f32
+// CHECK: return %[[MERGE_F32]] : f32
+func.func @promotable_through_cast_view_blocks(%a: f32, %cond: i1) -> f32 {
+ %cst = arith.constant 1.0 : f32
+ %slot = test.multi_slot_alloca : () -> memref<i32>
+ %view = test.transparent_cast_view %slot : (memref<i32>) -> memref<f32>
+ memref.store %cst, %view[] : memref<f32>
+ cf.cond_br %cond, ^bb1, ^bb2
+^bb1:
+ memref.store %a, %view[] : memref<f32>
+ cf.br ^bb2
+^bb2:
+ %v = memref.load %view[] : memref<f32>
+ return %v : f32
+}
diff --git a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
index a3ff397ac26db..ca7677a9663e7 100644
--- a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
+++ b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
@@ -1769,6 +1769,62 @@ TestMultiSlotAlloca::handleDestructuringComplete(
return createNewMultiAllocaWithoutSlot(slot, builder, *this);
}
+//===----------------------------------------------------------------------===//
+// TestTransparentView
+//===----------------------------------------------------------------------===//
+
+std::optional<PromotableSlotView> TestTransparentView::getPromotableSlotView() {
+ Type elemType = cast<MemRefType>(getResult().getType()).getElementType();
+ return PromotableSlotView{getSource(), MemorySlot{getResult(), elemType}};
+}
+
+bool TestTransparentView::canUsesBeRemoved(
+ const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ SmallVectorImpl<OpOperand *> &newBlockingUses,
+ const DataLayout &dataLayout) {
+ for (OpOperand &use : getResult().getUses())
+ newBlockingUses.push_back(&use);
+ return true;
+}
+
+DeletionKind TestTransparentView::removeBlockingUses(
+ const SmallPtrSetImpl<OpOperand *> &blockingUses, OpBuilder &builder) {
+ return DeletionKind::Delete;
+}
+
+//===----------------------------------------------------------------------===//
+// TestTransparentCastView
+//===----------------------------------------------------------------------===//
+
+std::optional<PromotableSlotView>
+TestTransparentCastView::getPromotableSlotView() {
+ Type elemType = cast<MemRefType>(getResult().getType()).getElementType();
+ return PromotableSlotView{getSource(), MemorySlot{getResult(), elemType}};
+}
+
+bool TestTransparentCastView::canUsesBeRemoved(
+ const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ SmallVectorImpl<OpOperand *> &newBlockingUses,
+ const DataLayout &dataLayout) {
+ for (OpOperand &use : getResult().getUses())
+ newBlockingUses.push_back(&use);
+ return true;
+}
+
+DeletionKind TestTransparentCastView::removeBlockingUses(
+ const SmallPtrSetImpl<OpOperand *> &blockingUses, OpBuilder &builder) {
+ return DeletionKind::Delete;
+}
+
+Value TestTransparentCastView::convertSlotValue(Value value, Type targetType,
+ OpBuilder &builder) {
+ if (value.getType() == targetType)
+ return value;
+ return UnrealizedConversionCastOp::create(builder, getLoc(), targetType,
+ value)
+ .getResult(0);
+}
+
namespace {
/// Returns test dialect's memref layout for test dialect's tensor encoding when
/// applicable.
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index 56db6837b870c..fad6b8ef6a60c 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -3941,6 +3941,32 @@ def TestMultiSlotAlloca : TEST_Op<"multi_slot_alloca",
let assemblyFormat = "attr-dict `:` functional-type(operands, results)";
}
+// Same-element-type transparent view of a memref slot. Exercises the
+// view-chain handling in mem2reg with an identity convertSlotValue.
+def TestTransparentView : TEST_Op<"transparent_view",
+ [DeclareOpInterfaceMethods<PromotableOpInterface,
+ ["canUsesBeRemoved",
+ "removeBlockingUses",
+ "getPromotableSlotView"]>]> {
+ let arguments = (ins MemRefOf<[I32]>:$source);
+ let results = (outs MemRefOf<[I32]>:$result);
+ let assemblyFormat = "$source attr-dict `:` functional-type($source, $result)";
+}
+
+// Type-changing transparent view of a memref slot. The result aliases the
+// source at a different element type; convertSlotValue bridges between
+// the two element types using `builtin.unrealized_conversion_cast`.
+def TestTransparentCastView : TEST_Op<"transparent_cast_view",
+ [DeclareOpInterfaceMethods<PromotableOpInterface,
+ ["canUsesBeRemoved",
+ "removeBlockingUses",
+ "getPromotableSlotView",
+ "convertSlotValue"]>]> {
+ let arguments = (ins MemRefOf<[I32, F32]>:$source);
+ let results = (outs MemRefOf<[I32, F32]>:$result);
+ let assemblyFormat = "$source attr-dict `:` functional-type($source, $result)";
+}
+
//===----------------------------------------------------------------------===//
// Test allocation Ops
//===----------------------------------------------------------------------===//
>From 8393227f28b7b4e02ea8c26762ac7affed5e9a91 Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Tue, 12 May 2026 03:23:51 -0700
Subject: [PATCH 02/12] fix handling of region
---
mlir/lib/Transforms/Mem2Reg.cpp | 6 +++---
mlir/test/Transforms/mem2reg.mlir | 23 +++++++++++++++++++++++
2 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/mlir/lib/Transforms/Mem2Reg.cpp b/mlir/lib/Transforms/Mem2Reg.cpp
index 4f7039bede304..f8fce7fbf8431 100644
--- a/mlir/lib/Transforms/Mem2Reg.cpp
+++ b/mlir/lib/Transforms/Mem2Reg.cpp
@@ -422,8 +422,9 @@ LogicalResult MemorySlotPromotionAnalyzer::computeBlockingUses(
for (OpOperand *blockingUse : newBlockingUses) {
assert(llvm::is_contained(user->getResults(), blockingUse->get()));
+ Operation *useOwner = blockingUse->getOwner();
SmallPtrSetImpl<OpOperand *> &newUserBlockingUseSet =
- blockingUsesMap[blockingUse->getOwner()];
+ userToBlockingUses[useOwner->getParentRegion()][useOwner];
newUserBlockingUseSet.insert(blockingUse);
}
}
@@ -793,8 +794,7 @@ void MemorySlotPromoter::removeBlockingUses(Region *region) {
Value reachingDefAtBlockingUse = reachingDef;
if (viewSlot.ptr != slot.ptr) {
// Convert the reaching definition to `viewSlot.elemType` to match
- // what the impl sees. Skipped when the chain is empty; any cast
- // unused by the impl will be cleaned up by DCE.
+ // what `toPromoteMemOp` sees.
reachingDefAtBlockingUse = convertSlotValueToViewValue(
reachingDef, viewSlot.ptr, slot, builder);
assert(reachingDefAtBlockingUse &&
diff --git a/mlir/test/Transforms/mem2reg.mlir b/mlir/test/Transforms/mem2reg.mlir
index edc0a37807b7b..aa7fe2bcb558d 100644
--- a/mlir/test/Transforms/mem2reg.mlir
+++ b/mlir/test/Transforms/mem2reg.mlir
@@ -272,3 +272,26 @@ func.func @promotable_through_cast_view_blocks(%a: f32, %cond: i1) -> f32 {
%v = memref.load %view[] : memref<f32>
return %v : f32
}
+
+// -----
+
+// Regression test: the view is defined in the parent region but the store
+// owning the propagated blocking use lives in a nested region (`scf.if`).
+// The new blocking use must be registered under the owner's region, otherwise
+// `removeBlockingUses` trips the "all operations must still be in the same
+// region" invariant after `scf.if` rebuilds itself in `finalizePromotion`.
+
+// CHECK-LABEL: func.func @promotable_through_view_across_regions
+// CHECK-SAME: (%[[COND:.*]]: i1, %[[A:.*]]: i32)
+// CHECK-NOT: test.multi_slot_alloca
+// CHECK-NOT: test.transparent_view
+// CHECK-NOT: memref.store
+// CHECK: scf.if %[[COND]]
+func.func @promotable_through_view_across_regions(%cond: i1, %a: i32) {
+ %slot = test.multi_slot_alloca : () -> memref<i32>
+ %view = test.transparent_view %slot : (memref<i32>) -> memref<i32>
+ scf.if %cond {
+ memref.store %a, %view[] : memref<i32>
+ }
+ return
+}
>From 58abb9090ffcdce9e2aae10b120642b5f8f157e7 Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Wed, 13 May 2026 03:32:13 -0700
Subject: [PATCH 03/12] Move APIs in PromotableAliaserInterface and split
bidirectional conversion API.
Move the new APIs into a new PromotableAliaserInterface for more clarity.
Split convertSlotValue into two directional APIs:
- convertSlotValueToViewValue to be called before promoting a load on a view.
- projectViewValueToSlotValue to be called after promoting a store on a view.
projectViewValueToSlotValue also take the reaching def of the slot before the store
so that partial view can be promoted via insert/extract.
---
.../mlir/Interfaces/MemorySlotInterfaces.h | 29 +++++---
.../mlir/Interfaces/MemorySlotInterfaces.td | 70 ++++++++++++++++---
mlir/lib/Interfaces/MemorySlotInterfaces.cpp | 44 ++++++++----
mlir/lib/Transforms/Mem2Reg.cpp | 17 ++---
mlir/test/Transforms/mem2reg.mlir | 20 +++---
mlir/test/lib/Dialect/Test/TestOpDefs.cpp | 15 +++-
mlir/test/lib/Dialect/Test/TestOps.td | 19 ++---
7 files changed, 154 insertions(+), 60 deletions(-)
diff --git a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
index 2163593ef823e..7f98925df6452 100644
--- a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
+++ b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
@@ -30,10 +30,11 @@ struct DestructurableMemorySlot : public MemorySlot {
DenseMap<Attribute, Type> subelementTypes;
};
-/// Description of a memory slot view produced by a `PromotableOpInterface`
-/// operation: `slotPointerOperand` is the operand viewed by the op,
-/// `view.ptr` is the result aliasing it, and `view.elemType` is the type
-/// at which `view.ptr` aliases the underlying slot.
+/// Description of a memory slot view produced by a
+/// `PromotableAliaserInterface` operation: `slotPointerOperand` is the slot
+/// pointer viewed by the op, `view.ptr` is the result aliasing it, and
+/// `view.elemType` is the type at which `view.ptr` aliases the underlying
+/// slot.
struct PromotableSlotView {
Value slotPointerOperand;
MemorySlot view;
@@ -56,7 +57,7 @@ enum class DeletionKind {
namespace mlir {
/// Returns true if `value` is `rootSlot.ptr` or a transitive view of it,
-/// following `PromotableOpInterface::getPromotableSlotView` chains. The
+/// following `PromotableAliaserInterface::getPromotableSlotView` chains. The
/// element type at which `value` aliases the slot is written to
/// `*outViewElemType` (equal to `rootSlot.elemType` when the chain is empty).
bool isPromotableSlotView(Value value, const MemorySlot &rootSlot,
@@ -66,22 +67,30 @@ bool isPromotableSlotView(Value value, const MemorySlot &rootSlot,
/// (possibly transitive) view of `rootSlot.ptr`, with `elemType` equal to
/// the type at which that operand aliases the slot. Mem2Reg uses this to
/// hand each `PromotableMemOpInterface` op a slot description tailored to
-/// its memref operand. Returns `nullopt` if no operand is a view of
+/// its slot pointer operand. Returns `nullopt` if no operand is a view of
/// `rootSlot`.
std::optional<MemorySlot> getOpViewSlot(Operation *op,
const MemorySlot &rootSlot);
/// Converts `slotValue` (typed at `rootSlot.elemType`) to the type at which
/// `viewPtr` aliases `rootSlot`, by chaining
-/// `PromotableOpInterface::convertSlotValue` calls along the view chain
-/// root-to-leaf. Returns `nullptr` if any step's converter fails.
+/// `PromotableAliaserInterface::projectSlotValueToViewValue` calls along
+/// the view chain root-to-leaf. Returns a null value if any step's projector
+/// fails.
Value convertSlotValueToViewValue(Value slotValue, Value viewPtr,
const MemorySlot &rootSlot,
OpBuilder &builder);
-/// Inverse of `convertSlotValueToViewValue`: converts `viewValue` back to
-/// `rootSlot.elemType` along the chain leaf-to-root.
+/// Inverse of `convertSlotValueToViewValue`: converts `viewValue` (typed at
+/// `viewPtr`'s view element type) back to `rootSlot.elemType` along the
+/// chain leaf-to-root, by chaining
+/// `PromotableAliaserInterface::projectViewValueToSlotValue` calls.
+/// `rootReachingDef` is the current slot value at `rootSlot.elemType`. It
+/// is projected down at each intermediate level to provide the
+/// reaching definition the per-step projector needs (e.g. for a partial
+/// subview that inserts `viewValue` into `reachingDef`).
Value convertViewValueToSlotValue(Value viewValue, Value viewPtr,
+ Value rootReachingDef,
const MemorySlot &rootSlot,
OpBuilder &builder);
diff --git a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td
index a8084ab7bf189..33097986da4d6 100644
--- a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td
+++ b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td
@@ -263,6 +263,33 @@ def PromotableOpInterface : OpInterface<"PromotableOpInterface"> {
(ins "::llvm::ArrayRef<std::pair<::mlir::Operation*, ::mlir::Value>>":$mutatedDefs,
"::mlir::OpBuilder &":$builder), [{}], [{ return; }]
>,
+ ];
+}
+
+def PromotableAliaserInterface : OpInterface<"PromotableAliaserInterface"> {
+ let description = [{
+ Describes an operation that produces a transparent alias of a memory slot
+ reached through one of its operands. Mem2Reg walks chains of such aliases
+ to project slot values across them, allowing the load/store operations on
+ the alias to be promoted as if they accessed the underlying slot directly.
+
+ An alias is still a blocking use of the underlying slot pointer, so the
+ operation must also implement `PromotableOpInterface` or
+ `PromotableMemOpInterface` so that mem2reg can remove the alias once the
+ slot has been promoted.
+ }];
+ let cppNamespace = "::mlir";
+
+ let verify = [{
+ if (!::mlir::isa<::mlir::PromotableOpInterface,
+ ::mlir::PromotableMemOpInterface>($_op))
+ return $_op->emitOpError(
+ "implements `PromotableAliaserInterface` but must also implement "
+ "`PromotableOpInterface` or `PromotableMemOpInterface`.");
+ return ::mlir::success();
+ }];
+
+ let methods = [
InterfaceMethod<[{
Describes this operation as a transparent view of a memory slot
reached through one of its operands.
@@ -272,26 +299,24 @@ def PromotableOpInterface : OpInterface<"PromotableOpInterface"> {
pointed to by `slotPointerOperand`, possibly different from the
underlying slot's element type.
- Returning a view here implies `convertSlotValue` can bridge
- between `slotPointerOperand`'s element type and `view.elemType`
- in both directions; if no such conversion exists, return
- `std::nullopt`.
+ Returning a view here implies the two projection methods below can
+ bridge between the slot pointer operand's element type and
+ `view.elemType`; if no such projection exists, return `std::nullopt`.
No IR mutation is allowed in this method.
}],
"::std::optional<::mlir::PromotableSlotView>",
"getPromotableSlotView",
- (ins), [{}],
- [{ return std::nullopt; }]
+ (ins)
>,
InterfaceMethod<[{
- Builds a value of `targetType` from `value`, bridging the
- underlying slot's element type and the view's element type.
- Mem2reg calls this in both directions (load: slot → view; store:
- view → slot).
+ Builds a value of `targetType` from `value`, projecting the slot
+ pointer operand's element type down to the view's element type.
+ Mem2Reg calls this when a load on the view needs the slot value
+ materialized at the view's element type.
}],
"::mlir::Value",
- "convertSlotValue",
+ "projectSlotValueToViewValue",
(ins "::mlir::Value":$value,
"::mlir::Type":$targetType,
"::mlir::OpBuilder &":$builder), [{}],
@@ -301,6 +326,29 @@ def PromotableOpInterface : OpInterface<"PromotableOpInterface"> {
return ::mlir::Value{};
}]
>,
+ InterfaceMethod<[{
+ Builds a value of `targetType` from `viewValue`, projecting the
+ view's element type back to the slot pointer operand's element type.
+ Mem2Reg calls this when a store on the view needs to update the
+ slot value at the slot pointer operand's element type.
+
+ `reachingDef` is the slot value at `targetType` immediately before
+ the store. For full views it can be ignored; for partial subviews
+ (e.g. one field of an aggregate) the result is built by inserting
+ `viewValue` into `reachingDef`.
+ }],
+ "::mlir::Value",
+ "projectViewValueToSlotValue",
+ (ins "::mlir::Value":$viewValue,
+ "::mlir::Type":$targetType,
+ "::mlir::Value":$reachingDef,
+ "::mlir::OpBuilder &":$builder), [{}],
+ [{
+ if (viewValue.getType() == targetType)
+ return viewValue;
+ return ::mlir::Value{};
+ }]
+ >,
];
}
diff --git a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
index 1e1961eeca07c..910bd2268ca78 100644
--- a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
+++ b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
@@ -17,11 +17,11 @@
using namespace mlir;
namespace {
-/// One step in a view chain, leaf-first. `inputElemType` is the elemType
-/// of the slot one step closer to root; `outputElemType` is the elemType
+/// One step in a view chain, leaf-first. `inputElemType` is the elem type
+/// of the slot one step closer to root; `outputElemType` is the elem type
/// this step exposes.
struct ViewStep {
- PromotableOpInterface view;
+ PromotableAliaserInterface view;
Type inputElemType;
Type outputElemType;
};
@@ -46,16 +46,16 @@ static bool walkPromotableSlotViewChain(Value value, const MemorySlot &rootSlot,
while (current != rootSlot.ptr) {
if (!seen.insert(current).second)
return false;
- auto promotable =
- dyn_cast_or_null<PromotableOpInterface>(current.getDefiningOp());
- if (!promotable)
+ auto aliaser =
+ dyn_cast_or_null<PromotableAliaserInterface>(current.getDefiningOp());
+ if (!aliaser)
return false;
- std::optional<PromotableSlotView> info = promotable.getPromotableSlotView();
+ std::optional<PromotableSlotView> info = aliaser.getPromotableSlotView();
if (!info || info->view.ptr != current)
return false;
if (!aliasElemType)
aliasElemType = info->view.elemType;
- chainOut.push_back(ViewStep{promotable, /*inputElemType=*/Type{},
+ chainOut.push_back(ViewStep{aliaser, /*inputElemType=*/Type{},
/*outputElemType=*/info->view.elemType});
current = info->slotPointerOperand;
}
@@ -98,7 +98,8 @@ Value mlir::convertSlotValueToViewValue(Value slotValue, Value viewPtr,
Value current = slotValue;
// Root-to-leaf walk: reverse the leaf-first chain.
for (ViewStep &step : llvm::reverse(chain)) {
- current = step.view.convertSlotValue(current, step.outputElemType, builder);
+ current = step.view.projectSlotValueToViewValue(
+ current, step.outputElemType, builder);
if (!current)
return {};
}
@@ -106,14 +107,33 @@ Value mlir::convertSlotValueToViewValue(Value slotValue, Value viewPtr,
}
Value mlir::convertViewValueToSlotValue(Value viewValue, Value viewPtr,
+ Value rootReachingDef,
const MemorySlot &rootSlot,
OpBuilder &builder) {
SmallVector<ViewStep> chain;
if (!walkPromotableSlotViewChain(viewPtr, rootSlot, chain, /*out=*/nullptr))
return {};
- Value current = viewValue;
- for (ViewStep &step : chain) {
- current = step.view.convertSlotValue(current, step.inputElemType, builder);
+
+ // Project `rootReachingDef` down to each step's input level so the
+ // per-step projector can use it (needed for partial subviews; full views
+ // ignore it). The chain is leaf-first, so `chain.back()` is root-most
+ // (its input is `rootSlot.elemType`) and `chain.front()` is leaf-most.
+ SmallVector<Value> perStepReachingDef(chain.size());
+ Value current = rootReachingDef;
+ for (int i = static_cast<int>(chain.size()) - 1; i >= 0; --i) {
+ perStepReachingDef[i] = current;
+ current = chain[i].view.projectSlotValueToViewValue(
+ current, chain[i].outputElemType, builder);
+ if (!current)
+ return {};
+ }
+
+ // Walk leaf-to-root, combining `viewValue` with the projected reaching
+ // definition at each step.
+ current = viewValue;
+ for (size_t i = 0; i < chain.size(); ++i) {
+ current = chain[i].view.projectViewValueToSlotValue(
+ current, chain[i].inputElemType, perStepReachingDef[i], builder);
if (!current)
return {};
}
diff --git a/mlir/lib/Transforms/Mem2Reg.cpp b/mlir/lib/Transforms/Mem2Reg.cpp
index f8fce7fbf8431..5cd2c890b5aaf 100644
--- a/mlir/lib/Transforms/Mem2Reg.cpp
+++ b/mlir/lib/Transforms/Mem2Reg.cpp
@@ -568,12 +568,13 @@ Value MemorySlotPromoter::promoteInBlock(Block *block, Value reachingDef) {
reachingDef = getOrCreateDefaultValue();
Value reachingDefAtStore = reachingDef;
if (slot.ptr != viewSlot.ptr) {
- // The store sees the slot at `viewSlot.elemType`; convert the
+ // The store sees the slot at `viewSlot.elemType`; project the
// reaching definition (at root elem type) before handing it to
// `getStored`.
reachingDefAtStore = convertSlotValueToViewValue(
reachingDef, viewSlot.ptr, slot, builder);
- assert(reachingDefAtStore && "convertSlotValue contract violation");
+ assert(reachingDefAtStore &&
+ "projectSlotValueToViewValue contract violation");
}
Value stored =
memOp.getStored(viewSlot, builder, reachingDefAtStore, dataLayout);
@@ -581,12 +582,12 @@ Value MemorySlotPromoter::promoteInBlock(Block *block, Value reachingDef) {
"new definition of the slot");
// `replacedValuesMap` keeps `stored` at `viewSlot.elemType` for
// `visitReplacedValues`; the new reaching definition is tracked at
- // the root slot's elem type, so convert `stored` back.
+ // the root slot's elem type, so project `stored` back.
replacedValuesMap[memOp] = stored;
if (viewSlot.ptr != slot.ptr) {
- stored =
- convertViewValueToSlotValue(stored, viewSlot.ptr, slot, builder);
- assert(stored && "convertSlotValue contract violation");
+ stored = convertViewValueToSlotValue(stored, viewSlot.ptr,
+ reachingDef, slot, builder);
+ assert(stored && "projectViewValueToSlotValue contract violation");
}
reachingDef = stored;
}
@@ -793,12 +794,12 @@ void MemorySlotPromoter::removeBlockingUses(Region *region) {
MemorySlot viewSlot = getOpViewSlot(toPromote, slot).value_or(slot);
Value reachingDefAtBlockingUse = reachingDef;
if (viewSlot.ptr != slot.ptr) {
- // Convert the reaching definition to `viewSlot.elemType` to match
+ // Project the reaching definition to `viewSlot.elemType` to match
// what `toPromoteMemOp` sees.
reachingDefAtBlockingUse = convertSlotValueToViewValue(
reachingDef, viewSlot.ptr, slot, builder);
assert(reachingDefAtBlockingUse &&
- "convertSlotValue contract violation");
+ "projectSlotValueToViewValue contract violation");
}
if (toPromoteMemOp.removeBlockingUses(
viewSlot, blockingUsesMap[toPromote], builder,
diff --git a/mlir/test/Transforms/mem2reg.mlir b/mlir/test/Transforms/mem2reg.mlir
index aa7fe2bcb558d..064669534daa5 100644
--- a/mlir/test/Transforms/mem2reg.mlir
+++ b/mlir/test/Transforms/mem2reg.mlir
@@ -186,9 +186,9 @@ func.func @poison_insertion_point(%val: f64) {
// Verifies that mem2reg promotes a memory slot whose stores and loads are
// reached through a transparent view operation that exposes itself via
-// PromotableOpInterface::getPromotableSlotView. The conditional store on
-// the view in ^bb1 must be discovered as a defining block, otherwise the
-// merge point at ^bb2 would not get a block argument and the promotion
+// PromotableAliaserInterface::getPromotableSlotView. The conditional store
+// on the view in ^bb1 must be discovered as a defining block, otherwise
+// the merge point at ^bb2 would not get a block argument and the promotion
// would silently drop the conditional update.
// CHECK-LABEL: func.func @promotable_through_view
@@ -219,9 +219,10 @@ func.func @promotable_through_view(%a: i32, %cond: i1) -> i32 {
// Type-changing transparent view: the store and load see the slot at f32
// while the underlying allocation is at i32. mem2reg materialises an
-// `unrealized_conversion_cast` (the view op's `convertSlotValue`) at the
-// store (f32 → i32 to update the reaching def at the slot's elem type) and
-// at the load (i32 → f32 to feed the load's f32 result type).
+// `unrealized_conversion_cast` (the view op's `projectViewValueToSlotValue`)
+// at the store (f32 → i32 to update the reaching def at the slot's elem
+// type) and at the load (i32 → f32 via `projectSlotValueToViewValue` to feed
+// the load's f32 result type).
// CHECK-LABEL: func.func @promotable_through_cast_view
// CHECK-SAME: (%[[A:.*]]: f32) -> f32
@@ -242,9 +243,10 @@ func.func @promotable_through_cast_view(%a: f32) -> f32 {
// Same as above with a conditional store across blocks. The merge-point
// block argument is at the root slot's element type (i32), and the
-// `convertSlotValue` casts are inserted at the store sites (f32 → i32) so
-// the merge argument can carry the conditional update; the load site
-// inserts the inverse cast (i32 → f32) for its result.
+// `projectViewValueToSlotValue` casts are inserted at the store sites
+// (f32 → i32) so the merge argument can carry the conditional update; the
+// load site inserts the inverse cast (i32 → f32) for its result via
+// `projectSlotValueToViewValue`.
// CHECK-LABEL: func.func @promotable_through_cast_view_blocks
// CHECK-SAME: (%[[A:.*]]: f32, %[[COND:.*]]: i1) -> f32
diff --git a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
index ca7677a9663e7..7605172b6c3bb 100644
--- a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
+++ b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
@@ -1816,8 +1816,9 @@ DeletionKind TestTransparentCastView::removeBlockingUses(
return DeletionKind::Delete;
}
-Value TestTransparentCastView::convertSlotValue(Value value, Type targetType,
- OpBuilder &builder) {
+Value TestTransparentCastView::projectSlotValueToViewValue(Value value,
+ Type targetType,
+ OpBuilder &builder) {
if (value.getType() == targetType)
return value;
return UnrealizedConversionCastOp::create(builder, getLoc(), targetType,
@@ -1825,6 +1826,16 @@ Value TestTransparentCastView::convertSlotValue(Value value, Type targetType,
.getResult(0);
}
+Value TestTransparentCastView::projectViewValueToSlotValue(
+ Value viewValue, Type targetType, Value /*reachingDef*/,
+ OpBuilder &builder) {
+ if (viewValue.getType() == targetType)
+ return viewValue;
+ return UnrealizedConversionCastOp::create(builder, getLoc(), targetType,
+ viewValue)
+ .getResult(0);
+}
+
namespace {
/// Returns test dialect's memref layout for test dialect's tensor encoding when
/// applicable.
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index fad6b8ef6a60c..029d65f071e04 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -3942,26 +3942,29 @@ def TestMultiSlotAlloca : TEST_Op<"multi_slot_alloca",
}
// Same-element-type transparent view of a memref slot. Exercises the
-// view-chain handling in mem2reg with an identity convertSlotValue.
+// view-chain handling in mem2reg with identity projections.
def TestTransparentView : TEST_Op<"transparent_view",
[DeclareOpInterfaceMethods<PromotableOpInterface,
["canUsesBeRemoved",
- "removeBlockingUses",
- "getPromotableSlotView"]>]> {
+ "removeBlockingUses"]>,
+ DeclareOpInterfaceMethods<PromotableAliaserInterface,
+ ["getPromotableSlotView"]>]> {
let arguments = (ins MemRefOf<[I32]>:$source);
let results = (outs MemRefOf<[I32]>:$result);
let assemblyFormat = "$source attr-dict `:` functional-type($source, $result)";
}
// Type-changing transparent view of a memref slot. The result aliases the
-// source at a different element type; convertSlotValue bridges between
-// the two element types using `builtin.unrealized_conversion_cast`.
+// source at a different element type; the projection methods bridge
+// between the two element types using `builtin.unrealized_conversion_cast`.
def TestTransparentCastView : TEST_Op<"transparent_cast_view",
[DeclareOpInterfaceMethods<PromotableOpInterface,
["canUsesBeRemoved",
- "removeBlockingUses",
- "getPromotableSlotView",
- "convertSlotValue"]>]> {
+ "removeBlockingUses"]>,
+ DeclareOpInterfaceMethods<PromotableAliaserInterface,
+ ["getPromotableSlotView",
+ "projectSlotValueToViewValue",
+ "projectViewValueToSlotValue"]>]> {
let arguments = (ins MemRefOf<[I32, F32]>:$source);
let results = (outs MemRefOf<[I32, F32]>:$result);
let assemblyFormat = "$source attr-dict `:` functional-type($source, $result)";
>From 573ccb55a61f8cc89b2072a95d9d6b9044e68178 Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Wed, 13 May 2026 04:03:40 -0700
Subject: [PATCH 04/12] rephrase descriptions
---
.../mlir/Interfaces/MemorySlotInterfaces.h | 50 ++++++--------
.../mlir/Interfaces/MemorySlotInterfaces.td | 69 ++++++++++---------
mlir/lib/Interfaces/MemorySlotInterfaces.cpp | 6 +-
3 files changed, 60 insertions(+), 65 deletions(-)
diff --git a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
index 7f98925df6452..f549193a75a05 100644
--- a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
+++ b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
@@ -30,13 +30,13 @@ struct DestructurableMemorySlot : public MemorySlot {
DenseMap<Attribute, Type> subelementTypes;
};
-/// Description of a memory slot view produced by a
-/// `PromotableAliaserInterface` operation: `slotPointerOperand` is the slot
-/// pointer viewed by the op, `view.ptr` is the result aliasing it, and
-/// `view.elemType` is the type at which `view.ptr` aliases the underlying
-/// slot.
+/// Represent a memory slot view produced by a `PromotableAliaserInterface`
+/// operation.
struct PromotableSlotView {
- Value slotPointerOperand;
+ /// The slot pointer operand that is aliased by the view.
+ Value aliasedSlotPointerOperand;
+ /// The MemorySlot created by the operation that aliases the operand
+ /// MemorySlot.
MemorySlot view;
};
@@ -56,39 +56,31 @@ enum class DeletionKind {
namespace mlir {
-/// Returns true if `value` is `rootSlot.ptr` or a transitive view of it,
-/// following `PromotableAliaserInterface::getPromotableSlotView` chains. The
-/// element type at which `value` aliases the slot is written to
-/// `*outViewElemType` (equal to `rootSlot.elemType` when the chain is empty).
+/// Returns true if `value` is `rootSlot.ptr` or a transitive view of it.
+/// If so, writes the element type of the alias to `*outViewElemType` (which
+/// defaults to `rootSlot.elemType` for the root pointer).
bool isPromotableSlotView(Value value, const MemorySlot &rootSlot,
Type *outViewElemType = nullptr);
-/// Returns a MemorySlot whose `ptr` is the operand of `op` that is a
-/// (possibly transitive) view of `rootSlot.ptr`, with `elemType` equal to
-/// the type at which that operand aliases the slot. Mem2Reg uses this to
-/// hand each `PromotableMemOpInterface` op a slot description tailored to
-/// its slot pointer operand. Returns `nullopt` if no operand is a view of
-/// `rootSlot`.
+/// Returns a `MemorySlot` representing the operand of `op` that is a view of
+/// `rootSlot.ptr`, tailored with the view's element type. Returns `nullopt`
+/// if no operand is a view of `rootSlot`.
std::optional<MemorySlot> getOpViewSlot(Operation *op,
const MemorySlot &rootSlot);
-/// Converts `slotValue` (typed at `rootSlot.elemType`) to the type at which
-/// `viewPtr` aliases `rootSlot`, by chaining
-/// `PromotableAliaserInterface::projectSlotValueToViewValue` calls along
-/// the view chain root-to-leaf. Returns a null value if any step's projector
-/// fails.
+/// Projects `slotValue` (of `rootSlot.elemType`) down to the element type of
+/// `viewPtr` by chaining `projectSlotValueToViewValue` calls along the alias
+/// chain, from the original slot down to the view pointer. Returns a null
+/// value if any projection step fails.
Value convertSlotValueToViewValue(Value slotValue, Value viewPtr,
const MemorySlot &rootSlot,
OpBuilder &builder);
-/// Inverse of `convertSlotValueToViewValue`: converts `viewValue` (typed at
-/// `viewPtr`'s view element type) back to `rootSlot.elemType` along the
-/// chain leaf-to-root, by chaining
-/// `PromotableAliaserInterface::projectViewValueToSlotValue` calls.
-/// `rootReachingDef` is the current slot value at `rootSlot.elemType`. It
-/// is projected down at each intermediate level to provide the
-/// reaching definition the per-step projector needs (e.g. for a partial
-/// subview that inserts `viewValue` into `reachingDef`).
+/// Projects `viewValue` back up to `rootSlot.elemType` by chaining
+/// `projectViewValueToSlotValue` calls backwards along the alias chain, from
+/// the view pointer up to the original slot. `rootReachingDef` provides the
+/// current slot value, which is projected down at each step to supply the
+/// required reaching definition (e.g., for partial subviews).
Value convertViewValueToSlotValue(Value viewValue, Value viewPtr,
Value rootReachingDef,
const MemorySlot &rootSlot,
diff --git a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td
index 33097986da4d6..85bf800d91e5c 100644
--- a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td
+++ b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td
@@ -268,15 +268,16 @@ def PromotableOpInterface : OpInterface<"PromotableOpInterface"> {
def PromotableAliaserInterface : OpInterface<"PromotableAliaserInterface"> {
let description = [{
- Describes an operation that produces a transparent alias of a memory slot
- reached through one of its operands. Mem2Reg walks chains of such aliases
- to project slot values across them, allowing the load/store operations on
- the alias to be promoted as if they accessed the underlying slot directly.
-
- An alias is still a blocking use of the underlying slot pointer, so the
- operation must also implement `PromotableOpInterface` or
- `PromotableMemOpInterface` so that mem2reg can remove the alias once the
- slot has been promoted.
+ Describes an operation that creates a transparent alias of a memory slot
+ accessed through one of its operands. Mem2Reg traverses chains of these
+ aliases to project slot values across them. This allows load and store
+ operations on the alias to be promoted as if they were directly accessing
+ the underlying slot.
+
+ Since an alias remains a blocking use of the underlying slot pointer, the
+ operation must also implement either `PromotableOpInterface` or
+ `PromotableMemOpInterface`. This ensures that mem2reg can remove the alias
+ after the slot has been promoted.
}];
let cppNamespace = "::mlir";
@@ -291,17 +292,17 @@ def PromotableAliaserInterface : OpInterface<"PromotableAliaserInterface"> {
let methods = [
InterfaceMethod<[{
- Describes this operation as a transparent view of a memory slot
- reached through one of its operands.
-
- The returned `view.ptr` must be a result of this operation;
- `view.elemType` is the type at which `view.ptr` aliases the slot
- pointed to by `slotPointerOperand`, possibly different from the
- underlying slot's element type.
-
- Returning a view here implies the two projection methods below can
- bridge between the slot pointer operand's element type and
- `view.elemType`; if no such projection exists, return `std::nullopt`.
+ Returns a view of a parent memory slot (accessed via
+ `aliasedSlotPointerOperand`) as a new memory slot. The pointer to this
+ new slot must be a result of the current operation, and its element
+ type may differ from the parent's.
+
+ Providing a view requires implementing the two projection methods below
+ to bridge values between the parent's and the new slot's element types.
+ One method extracts the new slot's value from the parent's value, while
+ the other reconstructs the parent's value after a store to the new slot.
+ If these projections cannot be performed, this method should return
+ `std::nullopt`.
No IR mutation is allowed in this method.
}],
@@ -310,10 +311,11 @@ def PromotableAliaserInterface : OpInterface<"PromotableAliaserInterface"> {
(ins)
>,
InterfaceMethod<[{
- Builds a value of `targetType` from `value`, projecting the slot
- pointer operand's element type down to the view's element type.
- Mem2Reg calls this when a load on the view needs the slot value
- materialized at the view's element type.
+ Constructs a value of `targetType` from the given `value`, projecting
+ the element type of the slot pointer operand down to the element type
+ of the view. Mem2Reg invokes this method when a load operation on the
+ view requires the slot value to be materialized with the view's
+ element type.
}],
"::mlir::Value",
"projectSlotValueToViewValue",
@@ -327,15 +329,16 @@ def PromotableAliaserInterface : OpInterface<"PromotableAliaserInterface"> {
}]
>,
InterfaceMethod<[{
- Builds a value of `targetType` from `viewValue`, projecting the
- view's element type back to the slot pointer operand's element type.
- Mem2Reg calls this when a store on the view needs to update the
- slot value at the slot pointer operand's element type.
-
- `reachingDef` is the slot value at `targetType` immediately before
- the store. For full views it can be ignored; for partial subviews
- (e.g. one field of an aggregate) the result is built by inserting
- `viewValue` into `reachingDef`.
+ Constructs a value of `targetType` from the given `viewValue`, projecting
+ the view's element type back to the element type of the slot pointer
+ operand. Mem2Reg invokes this method when a store operation on the view
+ needs to update the slot value using the element type of the slot
+ pointer operand.
+
+ The `reachingDef` parameter represents the slot value at `targetType`
+ immediately preceding the store. While it can be ignored for full views,
+ for partial subviews (such as a single field of an aggregate), the
+ result is constructed by inserting `viewValue` into `reachingDef`.
}],
"::mlir::Value",
"projectViewValueToSlotValue",
diff --git a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
index 910bd2268ca78..2a322dd3f74a5 100644
--- a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
+++ b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
@@ -57,7 +57,7 @@ static bool walkPromotableSlotViewChain(Value value, const MemorySlot &rootSlot,
aliasElemType = info->view.elemType;
chainOut.push_back(ViewStep{aliaser, /*inputElemType=*/Type{},
/*outputElemType=*/info->view.elemType});
- current = info->slotPointerOperand;
+ current = info->aliasedSlotPointerOperand;
}
// Fill in each step's `inputElemType` from the previous step's output
@@ -116,8 +116,8 @@ Value mlir::convertViewValueToSlotValue(Value viewValue, Value viewPtr,
// Project `rootReachingDef` down to each step's input level so the
// per-step projector can use it (needed for partial subviews; full views
- // ignore it). The chain is leaf-first, so `chain.back()` is root-most
- // (its input is `rootSlot.elemType`) and `chain.front()` is leaf-most.
+ // ignore it). The chain is leaf-first, so `chain.back()` is the root slot
+ // and `chain.front()` is the leaf view.
SmallVector<Value> perStepReachingDef(chain.size());
Value current = rootReachingDef;
for (int i = static_cast<int>(chain.size()) - 1; i >= 0; --i) {
>From 38fde8f3ee2607eea76667bb79cbd292eaa35dce Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Wed, 13 May 2026 04:24:01 -0700
Subject: [PATCH 05/12] use walkSlice
---
mlir/lib/Interfaces/CMakeLists.txt | 16 ++++++++++-
mlir/lib/Interfaces/MemorySlotInterfaces.cpp | 30 +++++++++-----------
2 files changed, 29 insertions(+), 17 deletions(-)
diff --git a/mlir/lib/Interfaces/CMakeLists.txt b/mlir/lib/Interfaces/CMakeLists.txt
index 41e890cb408ba..bb3a33117b912 100644
--- a/mlir/lib/Interfaces/CMakeLists.txt
+++ b/mlir/lib/Interfaces/CMakeLists.txt
@@ -100,7 +100,21 @@ add_mlir_library(MLIRLoopLikeInterface
)
add_mlir_interface_library(MemOpInterfaces)
-add_mlir_interface_library(MemorySlotInterfaces)
+
+add_mlir_library(MLIRMemorySlotInterfaces
+ MemorySlotInterfaces.cpp
+
+ ADDITIONAL_HEADER_DIRS
+ ${MLIR_MAIN_INCLUDE_DIR}/mlir/Interfaces
+
+ DEPENDS
+ MLIRMemorySlotInterfacesIncGen
+
+ LINK_LIBS PUBLIC
+ MLIRAnalysis
+ MLIRIR
+)
+
add_mlir_interface_library(ParallelCombiningOpInterface)
add_mlir_interface_library(RuntimeVerifiableOpInterface)
add_mlir_interface_library(ShapedOpInterfaces)
diff --git a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
index 2a322dd3f74a5..e2276fe879d9c 100644
--- a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
+++ b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
@@ -8,7 +8,7 @@
#include "mlir/Interfaces/MemorySlotInterfaces.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "mlir/Analysis/SliceWalk.h"
#include "llvm/ADT/SmallVector.h"
#include "mlir/Interfaces/MemorySlotOpInterfaces.cpp.inc"
@@ -34,31 +34,29 @@ struct ViewStep {
static bool walkPromotableSlotViewChain(Value value, const MemorySlot &rootSlot,
SmallVectorImpl<ViewStep> &chainOut,
Type *outViewElemType) {
- if (value == rootSlot.ptr) {
- if (outViewElemType)
- *outViewElemType = rootSlot.elemType;
- return true;
- }
-
- Value current = value;
Type aliasElemType{};
- llvm::SmallPtrSet<Value, 4> seen;
- while (current != rootSlot.ptr) {
- if (!seen.insert(current).second)
- return false;
+ bool reachedRoot = false;
+ WalkContinuation result = walkSlice(value, [&](Value current) {
+ if (current == rootSlot.ptr) {
+ reachedRoot = true;
+ return WalkContinuation::skip();
+ }
auto aliaser =
dyn_cast_or_null<PromotableAliaserInterface>(current.getDefiningOp());
if (!aliaser)
- return false;
+ return WalkContinuation::interrupt();
std::optional<PromotableSlotView> info = aliaser.getPromotableSlotView();
if (!info || info->view.ptr != current)
- return false;
+ return WalkContinuation::interrupt();
if (!aliasElemType)
aliasElemType = info->view.elemType;
chainOut.push_back(ViewStep{aliaser, /*inputElemType=*/Type{},
/*outputElemType=*/info->view.elemType});
- current = info->aliasedSlotPointerOperand;
- }
+ return WalkContinuation::advanceTo(info->aliasedSlotPointerOperand);
+ });
+
+ if (result.wasInterrupted() || !reachedRoot)
+ return false;
// Fill in each step's `inputElemType` from the previous step's output
// (or `rootSlot.elemType` for the root-most step).
>From 4c727f1e3220fe4663e7c49df05f0ce35c44cad2 Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Tue, 19 May 2026 05:44:59 -0700
Subject: [PATCH 06/12] update new API to be more generic
---
.../mlir/Interfaces/MemorySlotInterfaces.h | 92 +++++----
.../mlir/Interfaces/MemorySlotInterfaces.td | 82 ++++----
mlir/lib/Interfaces/CMakeLists.txt | 16 +-
mlir/lib/Interfaces/MemorySlotInterfaces.cpp | 191 ++++++++++--------
mlir/lib/Transforms/Mem2Reg.cpp | 85 +++++---
mlir/test/Transforms/mem2reg.mlir | 90 ++++-----
mlir/test/lib/Dialect/Test/TestOpDefs.cpp | 57 +++---
mlir/test/lib/Dialect/Test/TestOps.td | 21 +-
8 files changed, 336 insertions(+), 298 deletions(-)
diff --git a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
index f549193a75a05..ad3f5a2c9113f 100644
--- a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
+++ b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
@@ -13,6 +13,8 @@
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/PatternMatch.h"
+#include "llvm/ADT/DenseMap.h"
+
namespace mlir {
/// Represents a slot in memory. This is generated by an allocating operation
@@ -30,16 +32,6 @@ struct DestructurableMemorySlot : public MemorySlot {
DenseMap<Attribute, Type> subelementTypes;
};
-/// Represent a memory slot view produced by a `PromotableAliaserInterface`
-/// operation.
-struct PromotableSlotView {
- /// The slot pointer operand that is aliased by the view.
- Value aliasedSlotPointerOperand;
- /// The MemorySlot created by the operation that aliases the operand
- /// MemorySlot.
- MemorySlot view;
-};
-
/// Returned by operation promotion logic requesting the deletion of an
/// operation.
enum class DeletionKind {
@@ -56,35 +48,59 @@ enum class DeletionKind {
namespace mlir {
-/// Returns true if `value` is `rootSlot.ptr` or a transitive view of it.
-/// If so, writes the element type of the alias to `*outViewElemType` (which
-/// defaults to `rootSlot.elemType` for the root pointer).
-bool isPromotableSlotView(Value value, const MemorySlot &rootSlot,
- Type *outViewElemType = nullptr);
-
-/// Returns a `MemorySlot` representing the operand of `op` that is a view of
-/// `rootSlot.ptr`, tailored with the view's element type. Returns `nullopt`
-/// if no operand is a view of `rootSlot`.
-std::optional<MemorySlot> getOpViewSlot(Operation *op,
- const MemorySlot &rootSlot);
-
-/// Projects `slotValue` (of `rootSlot.elemType`) down to the element type of
-/// `viewPtr` by chaining `projectSlotValueToViewValue` calls along the alias
-/// chain, from the original slot down to the view pointer. Returns a null
+/// An entry in a `PromotableAliasMap`: the memory slot exposed by an aliaser
+/// operation, along with the operand it aliases from.
+struct PromotableSlotAliasInfo {
+ /// The slot exposed by the aliaser (its `ptr` is a result of the aliaser
+ /// and equals the map key).
+ MemorySlot slot;
+ /// The aliaser operand whose value is the parent slot's pointer.
+ OpOperand *aliasedSlotPointerOperand;
+};
+
+/// Maps an alias value (a result of a `PromotableAliaserInterface` op)
+/// reachable from a root slot to its `PromotableSlotAliasInfo`.
+using PromotableAliasMap =
+ llvm::SmallDenseMap<Value, PromotableSlotAliasInfo, 4>;
+
+/// Populates `aliasMap` with alias entries produced by `aliaser` for operands
+/// that already alias `rootSlot`. This should be called during a forward slice
+/// traversal from `rootSlot.ptr` to ensure topological ordering.
+void populatePromotableAliasMap(PromotableAliaserInterface aliaser,
+ const MemorySlot &rootSlot,
+ PromotableAliasMap &aliasMap);
+
+/// Returns a `MemorySlot` representing the operand of `op` that aliases
+/// `rootSlot.ptr`, using the alias's element type. Returns `nullopt` if no
+/// operand is an alias of `rootSlot`.
+std::optional<MemorySlot> getOpAliasSlot(Operation *op,
+ const MemorySlot &rootSlot,
+ const PromotableAliasMap &aliasMap);
+
+/// Returns true if at most one of `op`'s operands aliases `rootSlot`.
+/// This is useful to guard `getOpAliasSlot` calls, as operations reaching
+/// the root through multiple distinct aliases (e.g., memcpy between aliases)
+/// cannot be handled by interfaces expecting a single slot.
+bool isUsingAtMostOneSlotAlias(Operation *op, const MemorySlot &rootSlot,
+ const PromotableAliasMap &aliasMap);
+
+/// Projects `slotValue` down to the element type of `aliasPtr` by chaining
+/// `projectSlotValueToAliasValue` calls along the alias chain. Returns a null
/// value if any projection step fails.
-Value convertSlotValueToViewValue(Value slotValue, Value viewPtr,
- const MemorySlot &rootSlot,
- OpBuilder &builder);
-
-/// Projects `viewValue` back up to `rootSlot.elemType` by chaining
-/// `projectViewValueToSlotValue` calls backwards along the alias chain, from
-/// the view pointer up to the original slot. `rootReachingDef` provides the
-/// current slot value, which is projected down at each step to supply the
-/// required reaching definition (e.g., for partial subviews).
-Value convertViewValueToSlotValue(Value viewValue, Value viewPtr,
- Value rootReachingDef,
- const MemorySlot &rootSlot,
- OpBuilder &builder);
+Value convertSlotValueToAliasValue(Value slotValue, Value aliasPtr,
+ const MemorySlot &rootSlot,
+ const PromotableAliasMap &aliasMap,
+ OpBuilder &builder);
+
+/// Projects `aliasValue` back up to `rootSlot.elemType` by chaining
+/// `projectAliasValueToSlotValue` calls backwards along the alias chain.
+/// `rootReachingDef` provides the current slot value, which is projected
+/// down at each step to supply the required reaching definition.
+Value convertAliasValueToSlotValue(Value aliasValue, Value aliasPtr,
+ Value rootReachingDef,
+ const MemorySlot &rootSlot,
+ const PromotableAliasMap &aliasMap,
+ OpBuilder &builder);
} // namespace mlir
diff --git a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td
index 85bf800d91e5c..cfb914c798729 100644
--- a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td
+++ b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td
@@ -292,65 +292,59 @@ def PromotableAliaserInterface : OpInterface<"PromotableAliaserInterface"> {
let methods = [
InterfaceMethod<[{
- Returns a view of a parent memory slot (accessed via
- `aliasedSlotPointerOperand`) as a new memory slot. The pointer to this
- new slot must be a result of the current operation, and its element
- type may differ from the parent's.
-
- Providing a view requires implementing the two projection methods below
- to bridge values between the parent's and the new slot's element types.
- One method extracts the new slot's value from the parent's value, while
- the other reconstructs the parent's value after a store to the new slot.
- If these projections cannot be performed, this method should return
- `std::nullopt`.
+ Populates `newMemorySlots` with the memory slots this operation
+ exposes by aliasing `parentSlot` (accessed via
+ `aliasedSlotPointerOperand`). Each new slot's pointer must be a
+ result of this operation, and its element type may differ from the
+ parent's. Leave the vector empty if no alias is exposed. An operation
+ can expose multiple aliases for the same parent slot.
+
+ `parentSlot` is provided so that aliasers using opaque pointers can
+ derive the new slot's element type from `parentSlot.elemType`.
+
+ Exposing an alias requires implementing the two projection methods
+ below to bridge values between the parent and new slot element types.
+ If these projections cannot be performed, leave `newMemorySlots` empty.
No IR mutation is allowed in this method.
}],
- "::std::optional<::mlir::PromotableSlotView>",
- "getPromotableSlotView",
- (ins)
+ "void",
+ "getPromotableSlotAliases",
+ (ins "::mlir::OpOperand &":$aliasedSlotPointerOperand,
+ "const ::mlir::MemorySlot &":$parentSlot,
+ "::llvm::SmallVectorImpl<::mlir::MemorySlot> &":$newMemorySlots)
>,
InterfaceMethod<[{
- Constructs a value of `targetType` from the given `value`, projecting
- the element type of the slot pointer operand down to the element type
- of the view. Mem2Reg invokes this method when a load operation on the
- view requires the slot value to be materialized with the view's
+ Extracts the value of `aliasSlot` from `slotValue` (the value of
+ `parentSlot`). Mem2Reg invokes this method when a load on the new slot
+ requires the parent's value to be materialized with the new slot's
element type.
}],
"::mlir::Value",
- "projectSlotValueToViewValue",
- (ins "::mlir::Value":$value,
- "::mlir::Type":$targetType,
+ "projectSlotValueToAliasValue",
+ (ins "::mlir::OpOperand &":$aliasedSlotPointerOperand,
+ "const ::mlir::MemorySlot &":$parentSlot,
+ "const ::mlir::MemorySlot &":$aliasSlot,
+ "::mlir::Value":$slotValue,
"::mlir::OpBuilder &":$builder), [{}],
- [{
- if (value.getType() == targetType)
- return value;
- return ::mlir::Value{};
- }]
+ [{ return slotValue; }]
>,
InterfaceMethod<[{
- Constructs a value of `targetType` from the given `viewValue`, projecting
- the view's element type back to the element type of the slot pointer
- operand. Mem2Reg invokes this method when a store operation on the view
- needs to update the slot value using the element type of the slot
- pointer operand.
-
- The `reachingDef` parameter represents the slot value at `targetType`
- immediately preceding the store. While it can be ignored for full views,
- for partial subviews (such as a single field of an aggregate), the
- result is constructed by inserting `viewValue` into `reachingDef`.
+ Reconstructs the value of `parentSlot` from `aliasValue` (a store to
+ `aliasSlot`) and `reachingDef` (the parent slot's value immediately
+ preceding the store). For full aliases, `reachingDef` can be ignored.
+ For partial sub-aliases, it allows the result to be constructed by
+ inserting `aliasValue` into `reachingDef`.
}],
"::mlir::Value",
- "projectViewValueToSlotValue",
- (ins "::mlir::Value":$viewValue,
- "::mlir::Type":$targetType,
+ "projectAliasValueToSlotValue",
+ (ins "::mlir::OpOperand &":$aliasedSlotPointerOperand,
+ "const ::mlir::MemorySlot &":$parentSlot,
+ "const ::mlir::MemorySlot &":$aliasSlot,
+ "::mlir::Value":$aliasValue,
"::mlir::Value":$reachingDef,
"::mlir::OpBuilder &":$builder), [{}],
- [{
- if (viewValue.getType() == targetType)
- return viewValue;
- return ::mlir::Value{};
- }]
+ [{ return aliasValue; }]
>,
];
}
diff --git a/mlir/lib/Interfaces/CMakeLists.txt b/mlir/lib/Interfaces/CMakeLists.txt
index bb3a33117b912..41e890cb408ba 100644
--- a/mlir/lib/Interfaces/CMakeLists.txt
+++ b/mlir/lib/Interfaces/CMakeLists.txt
@@ -100,21 +100,7 @@ add_mlir_library(MLIRLoopLikeInterface
)
add_mlir_interface_library(MemOpInterfaces)
-
-add_mlir_library(MLIRMemorySlotInterfaces
- MemorySlotInterfaces.cpp
-
- ADDITIONAL_HEADER_DIRS
- ${MLIR_MAIN_INCLUDE_DIR}/mlir/Interfaces
-
- DEPENDS
- MLIRMemorySlotInterfacesIncGen
-
- LINK_LIBS PUBLIC
- MLIRAnalysis
- MLIRIR
-)
-
+add_mlir_interface_library(MemorySlotInterfaces)
add_mlir_interface_library(ParallelCombiningOpInterface)
add_mlir_interface_library(RuntimeVerifiableOpInterface)
add_mlir_interface_library(ShapedOpInterfaces)
diff --git a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
index e2276fe879d9c..7cd8e96d892de 100644
--- a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
+++ b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
@@ -8,7 +8,6 @@
#include "mlir/Interfaces/MemorySlotInterfaces.h"
-#include "mlir/Analysis/SliceWalk.h"
#include "llvm/ADT/SmallVector.h"
#include "mlir/Interfaces/MemorySlotOpInterfaces.cpp.inc"
@@ -16,122 +15,144 @@
using namespace mlir;
-namespace {
-/// One step in a view chain, leaf-first. `inputElemType` is the elem type
-/// of the slot one step closer to root; `outputElemType` is the elem type
-/// this step exposes.
-struct ViewStep {
- PromotableAliaserInterface view;
- Type inputElemType;
- Type outputElemType;
-};
-} // namespace
+/// Returns the slot describing `aliasPtr`: `rootSlot` if it is the root,
+/// the entry in `aliasMap` if it's a known alias, or `nullopt` otherwise.
+static std::optional<MemorySlot>
+getParentSlot(Value aliasPtr, const MemorySlot &rootSlot,
+ const PromotableAliasMap &aliasMap) {
+ if (aliasPtr == rootSlot.ptr)
+ return rootSlot;
+ auto it = aliasMap.find(aliasPtr);
+ if (it == aliasMap.end())
+ return std::nullopt;
+ return it->second.slot;
+}
-/// Walks back from `value` to `rootSlot.ptr` along
-/// `getPromotableSlotView` chains. On success, populates `chainOut` with
-/// the view ops leaf-to-root and writes the type at which `value` aliases
-/// the underlying slot to `*outViewElemType`.
-static bool walkPromotableSlotViewChain(Value value, const MemorySlot &rootSlot,
- SmallVectorImpl<ViewStep> &chainOut,
- Type *outViewElemType) {
- Type aliasElemType{};
- bool reachedRoot = false;
- WalkContinuation result = walkSlice(value, [&](Value current) {
- if (current == rootSlot.ptr) {
- reachedRoot = true;
- return WalkContinuation::skip();
- }
- auto aliaser =
- dyn_cast_or_null<PromotableAliaserInterface>(current.getDefiningOp());
- if (!aliaser)
- return WalkContinuation::interrupt();
- std::optional<PromotableSlotView> info = aliaser.getPromotableSlotView();
- if (!info || info->view.ptr != current)
- return WalkContinuation::interrupt();
- if (!aliasElemType)
- aliasElemType = info->view.elemType;
- chainOut.push_back(ViewStep{aliaser, /*inputElemType=*/Type{},
- /*outputElemType=*/info->view.elemType});
- return WalkContinuation::advanceTo(info->aliasedSlotPointerOperand);
- });
+void mlir::populatePromotableAliasMap(PromotableAliaserInterface aliaser,
+ const MemorySlot &rootSlot,
+ PromotableAliasMap &aliasMap) {
+ for (OpOperand &operand : aliaser->getOpOperands()) {
+ std::optional<MemorySlot> parentSlot =
+ getParentSlot(operand.get(), rootSlot, aliasMap);
+ if (!parentSlot)
+ continue;
+ SmallVector<MemorySlot, 2> newSlots;
+ aliaser.getPromotableSlotAliases(operand, *parentSlot, newSlots);
+ for (const MemorySlot &alias : newSlots)
+ aliasMap.try_emplace(alias.ptr, PromotableSlotAliasInfo{alias, &operand});
+ }
+}
- if (result.wasInterrupted() || !reachedRoot)
- return false;
+std::optional<MemorySlot>
+mlir::getOpAliasSlot(Operation *op, const MemorySlot &rootSlot,
+ const PromotableAliasMap &aliasMap) {
+ for (Value operand : op->getOperands())
+ if (std::optional<MemorySlot> slot =
+ getParentSlot(operand, rootSlot, aliasMap))
+ return slot;
+ return std::nullopt;
+}
- // Fill in each step's `inputElemType` from the previous step's output
- // (or `rootSlot.elemType` for the root-most step).
- Type prevOutput = rootSlot.elemType;
- for (ViewStep &step : llvm::reverse(chainOut)) {
- step.inputElemType = prevOutput;
- prevOutput = step.outputElemType;
+bool mlir::isUsingAtMostOneSlotAlias(Operation *op, const MemorySlot &rootSlot,
+ const PromotableAliasMap &aliasMap) {
+ Value uniqueAliasPtr;
+ for (Value operand : op->getOperands()) {
+ std::optional<MemorySlot> slot = getParentSlot(operand, rootSlot, aliasMap);
+ if (!slot)
+ continue;
+ if (uniqueAliasPtr && uniqueAliasPtr != slot->ptr)
+ return false;
+ uniqueAliasPtr = slot->ptr;
}
-
- if (outViewElemType)
- *outViewElemType = aliasElemType ? aliasElemType : rootSlot.elemType;
return true;
}
-bool mlir::isPromotableSlotView(Value value, const MemorySlot &rootSlot,
- Type *outViewElemType) {
- SmallVector<ViewStep> chain;
- return walkPromotableSlotViewChain(value, rootSlot, chain, outViewElemType);
-}
+namespace {
+/// A step in an alias chain, from leaf to root. `parentSlot` is one step
+/// closer to the root; `aliasSlot` is the slot exposed at this step.
+struct ChainStep {
+ PromotableAliaserInterface aliaser;
+ OpOperand *aliasedSlotPointerOperand;
+ MemorySlot parentSlot;
+ MemorySlot aliasSlot;
+};
+} // namespace
-std::optional<MemorySlot> mlir::getOpViewSlot(Operation *op,
- const MemorySlot &rootSlot) {
- for (Value operand : op->getOperands()) {
- Type viewElemType;
- if (isPromotableSlotView(operand, rootSlot, &viewElemType))
- return MemorySlot{operand, viewElemType};
+/// Walks back from `aliasPtr` to `rootSlot.ptr` through `aliasMap`,
+/// populating `chainOut` from leaf to root. Returns false if `aliasPtr`
+/// is not a known alias of `rootSlot`.
+static bool buildAliasChain(Value aliasPtr, const MemorySlot &rootSlot,
+ const PromotableAliasMap &aliasMap,
+ SmallVectorImpl<ChainStep> &chainOut) {
+ if (aliasPtr == rootSlot.ptr)
+ return true;
+ Value current = aliasPtr;
+ while (current != rootSlot.ptr) {
+ auto it = aliasMap.find(current);
+ if (it == aliasMap.end())
+ return false;
+ OpOperand *operand = it->second.aliasedSlotPointerOperand;
+ auto aliaser = cast<PromotableAliaserInterface>(operand->getOwner());
+ std::optional<MemorySlot> parent =
+ getParentSlot(operand->get(), rootSlot, aliasMap);
+ if (!parent)
+ return false;
+ chainOut.push_back(ChainStep{aliaser, operand, *parent, it->second.slot});
+ current = operand->get();
}
- return std::nullopt;
+ return true;
}
-Value mlir::convertSlotValueToViewValue(Value slotValue, Value viewPtr,
- const MemorySlot &rootSlot,
- OpBuilder &builder) {
- SmallVector<ViewStep> chain;
- if (!walkPromotableSlotViewChain(viewPtr, rootSlot, chain, /*out=*/nullptr))
+Value mlir::convertSlotValueToAliasValue(Value slotValue, Value aliasPtr,
+ const MemorySlot &rootSlot,
+ const PromotableAliasMap &aliasMap,
+ OpBuilder &builder) {
+ SmallVector<ChainStep> chain;
+ if (!buildAliasChain(aliasPtr, rootSlot, aliasMap, chain))
return {};
Value current = slotValue;
// Root-to-leaf walk: reverse the leaf-first chain.
- for (ViewStep &step : llvm::reverse(chain)) {
- current = step.view.projectSlotValueToViewValue(
- current, step.outputElemType, builder);
+ for (ChainStep &step : llvm::reverse(chain)) {
+ current = step.aliaser.projectSlotValueToAliasValue(
+ *step.aliasedSlotPointerOperand, step.parentSlot, step.aliasSlot,
+ current, builder);
if (!current)
return {};
}
return current;
}
-Value mlir::convertViewValueToSlotValue(Value viewValue, Value viewPtr,
- Value rootReachingDef,
- const MemorySlot &rootSlot,
- OpBuilder &builder) {
- SmallVector<ViewStep> chain;
- if (!walkPromotableSlotViewChain(viewPtr, rootSlot, chain, /*out=*/nullptr))
+Value mlir::convertAliasValueToSlotValue(Value aliasValue, Value aliasPtr,
+ Value rootReachingDef,
+ const MemorySlot &rootSlot,
+ const PromotableAliasMap &aliasMap,
+ OpBuilder &builder) {
+ SmallVector<ChainStep> chain;
+ if (!buildAliasChain(aliasPtr, rootSlot, aliasMap, chain))
return {};
- // Project `rootReachingDef` down to each step's input level so the
- // per-step projector can use it (needed for partial subviews; full views
- // ignore it). The chain is leaf-first, so `chain.back()` is the root slot
- // and `chain.front()` is the leaf view.
+ // Project `rootReachingDef` down to each step's parent level so the
+ // per-step projector can use it (needed for partial sub-aliases; full
+ // aliases ignore it). The chain is leaf-first, so `chain.back()` is the
+ // root-most step (parent = rootSlot) and `chain.front()` is the leaf.
SmallVector<Value> perStepReachingDef(chain.size());
Value current = rootReachingDef;
for (int i = static_cast<int>(chain.size()) - 1; i >= 0; --i) {
perStepReachingDef[i] = current;
- current = chain[i].view.projectSlotValueToViewValue(
- current, chain[i].outputElemType, builder);
+ current = chain[i].aliaser.projectSlotValueToAliasValue(
+ *chain[i].aliasedSlotPointerOperand, chain[i].parentSlot,
+ chain[i].aliasSlot, current, builder);
if (!current)
return {};
}
- // Walk leaf-to-root, combining `viewValue` with the projected reaching
+ // Walk leaf-to-root, combining `aliasValue` with the projected reaching
// definition at each step.
- current = viewValue;
+ current = aliasValue;
for (size_t i = 0; i < chain.size(); ++i) {
- current = chain[i].view.projectViewValueToSlotValue(
- current, chain[i].inputElemType, perStepReachingDef[i], builder);
+ current = chain[i].aliaser.projectAliasValueToSlotValue(
+ *chain[i].aliasedSlotPointerOperand, chain[i].parentSlot,
+ chain[i].aliasSlot, current, perStepReachingDef[i], builder);
if (!current)
return {};
}
diff --git a/mlir/lib/Transforms/Mem2Reg.cpp b/mlir/lib/Transforms/Mem2Reg.cpp
index 5cd2c890b5aaf..e4f315a47f13e 100644
--- a/mlir/lib/Transforms/Mem2Reg.cpp
+++ b/mlir/lib/Transforms/Mem2Reg.cpp
@@ -158,6 +158,9 @@ struct MemorySlotPromotionInfo {
/// are guaranteed to be held by a PromotableRegionOpInterface, and to be
/// nested within the parent region of the slot pointer.
DenseMap<Region *, RegionPromotionInfo> regionsToPromote;
+ /// Transitive aliases of `slot.ptr` via `PromotableAliaserInterface`,
+ /// mapping alias values to their exposed slot and aliased operand.
+ PromotableAliasMap aliasMap;
};
/// Computes information for basic slot promotion. This will check that direct
@@ -184,9 +187,10 @@ class MemorySlotPromotionAnalyzer {
/// Resulting blocking uses are grouped by region.
/// This also ensures all the uses are within promotable regions, adding
/// information about regions to be promoted to the `regionsToPromote` map.
- LogicalResult computeBlockingUses(
- RegionBlockingUsesMap &userToBlockingUses,
- DenseMap<Region *, RegionPromotionInfo> ®ionsToPromote);
+ LogicalResult
+ computeBlockingUses(RegionBlockingUsesMap &userToBlockingUses,
+ DenseMap<Region *, RegionPromotionInfo> ®ionsToPromote,
+ PromotableAliasMap &aliasMap);
/// Computes the points in the provided region where multiple re-definitions
/// of the slot's value (stores) may conflict.
@@ -344,7 +348,8 @@ Value MemorySlotPromoter::getOrCreateDefaultValue() {
LogicalResult MemorySlotPromotionAnalyzer::computeBlockingUses(
RegionBlockingUsesMap &userToBlockingUses,
- DenseMap<Region *, RegionPromotionInfo> ®ionsToPromote) {
+ DenseMap<Region *, RegionPromotionInfo> ®ionsToPromote,
+ PromotableAliasMap &aliasMap) {
// The promotion of an operation may require the promotion of further
// operations (typically, removing operations that use an operation that must
// delete itself). We thus need to start from the use of the slot pointer and
@@ -389,6 +394,10 @@ LogicalResult MemorySlotPromotionAnalyzer::computeBlockingUses(
if (it == blockingUsesMap.end())
continue;
+ // Populate the alias map for alias-exposing ops.
+ if (auto aliaser = dyn_cast<PromotableAliaserInterface>(user))
+ populatePromotableAliasMap(aliaser, slot, aliasMap);
+
SmallPtrSet<OpOperand *, 4> &blockingUses = it->second;
SmallVector<OpOperand *> newBlockingUses;
@@ -400,15 +409,22 @@ LogicalResult MemorySlotPromotionAnalyzer::computeBlockingUses(
return failure();
regionsWithDirectUse.insert(user->getParentRegion());
} else if (auto promotable = dyn_cast<PromotableMemOpInterface>(user)) {
- MemorySlot viewSlot = getOpViewSlot(user, slot).value_or(slot);
- if (!promotable.canUsesBeRemoved(viewSlot, blockingUses, newBlockingUses,
+ // If the memop reaches the root slot through multiple distinct alias
+ // operands, promotion fails. `PromotableMemOpInterface` currently
+ // expects a single slot per call. Supporting multiple aliases would
+ // require extending the interface.
+ if (!isUsingAtMostOneSlotAlias(user, slot, aliasMap))
+ return failure();
+ MemorySlot aliasSlot =
+ getOpAliasSlot(user, slot, aliasMap).value_or(slot);
+ if (!promotable.canUsesBeRemoved(aliasSlot, blockingUses, newBlockingUses,
dataLayout))
return failure();
// Operations that interact with the slot's memory will be promoted using
// a reaching definition. Therefore, the operation must be within a region
// where the reaching definition can be computed.
- if (promotable.storesTo(viewSlot))
+ if (promotable.storesTo(aliasSlot))
regionsWithDirectStore.insert(user->getParentRegion());
else
regionsWithDirectUse.insert(user->getParentRegion());
@@ -510,8 +526,8 @@ MemorySlotPromotionAnalyzer::computeInfo() {
// cannot find a way to resolve their blocking uses, we abort the promotion.
// We also compute at this stage the regions that will be analyzed for
// reaching definition information.
- if (failed(
- computeBlockingUses(info.userToBlockingUses, info.regionsToPromote)))
+ if (failed(computeBlockingUses(info.userToBlockingUses, info.regionsToPromote,
+ info.aliasMap)))
return {};
// Compute the blocks containing a store for each region, either directly or
@@ -519,13 +535,14 @@ MemorySlotPromotionAnalyzer::computeInfo() {
// all regions with at least one store.
//
// Iterating `info.userToBlockingUses` lets this also pick up stores that
- // reach the slot through chains of views (`getPromotableSlotView`).
+ // reach the slot through chains of aliases (`getPromotableSlotAliases`).
DenseMap<Region *, SmallPtrSet<Block *, 16>> definingBlocks;
for (auto &[region, opsMap] : info.userToBlockingUses)
for (auto &[user, _blockingUses] : opsMap)
if (auto storeOp = dyn_cast<PromotableMemOpInterface>(user)) {
- MemorySlot viewSlot = getOpViewSlot(user, slot).value_or(slot);
- if (storeOp.storesTo(viewSlot))
+ MemorySlot aliasSlot =
+ getOpAliasSlot(user, slot, info.aliasMap).value_or(slot);
+ if (storeOp.storesTo(aliasSlot))
definingBlocks[region].insert(user->getBlock());
}
for (auto &[region, regionInfo] : info.regionsToPromote)
@@ -558,8 +575,9 @@ Value MemorySlotPromoter::promoteInBlock(Block *block, Value reachingDef) {
if (info.userToBlockingUses[memOp->getParentRegion()].contains(memOp))
reachingDefs.insert({memOp, reachingDef});
- MemorySlot viewSlot = getOpViewSlot(memOp, slot).value_or(slot);
- if (memOp.storesTo(viewSlot)) {
+ MemorySlot aliasSlot =
+ getOpAliasSlot(memOp, slot, info.aliasMap).value_or(slot);
+ if (memOp.storesTo(aliasSlot)) {
builder.setInsertionPointAfter(memOp);
// To not expose default value creation to the interfaces, if we have
// no reaching definition by now, we set it to the default value.
@@ -567,27 +585,27 @@ Value MemorySlotPromoter::promoteInBlock(Block *block, Value reachingDef) {
if (!reachingDef)
reachingDef = getOrCreateDefaultValue();
Value reachingDefAtStore = reachingDef;
- if (slot.ptr != viewSlot.ptr) {
- // The store sees the slot at `viewSlot.elemType`; project the
+ if (slot.ptr != aliasSlot.ptr) {
+ // The store sees the slot at `aliasSlot.elemType`; project the
// reaching definition (at root elem type) before handing it to
// `getStored`.
- reachingDefAtStore = convertSlotValueToViewValue(
- reachingDef, viewSlot.ptr, slot, builder);
+ reachingDefAtStore = convertSlotValueToAliasValue(
+ reachingDef, aliasSlot.ptr, slot, info.aliasMap, builder);
assert(reachingDefAtStore &&
- "projectSlotValueToViewValue contract violation");
+ "projectSlotValueToAliasValue contract violation");
}
Value stored =
- memOp.getStored(viewSlot, builder, reachingDefAtStore, dataLayout);
+ memOp.getStored(aliasSlot, builder, reachingDefAtStore, dataLayout);
assert(stored && "a memory operation storing to a slot must provide a "
"new definition of the slot");
- // `replacedValuesMap` keeps `stored` at `viewSlot.elemType` for
+ // `replacedValuesMap` keeps `stored` at `aliasSlot.elemType` for
// `visitReplacedValues`; the new reaching definition is tracked at
// the root slot's elem type, so project `stored` back.
replacedValuesMap[memOp] = stored;
- if (viewSlot.ptr != slot.ptr) {
- stored = convertViewValueToSlotValue(stored, viewSlot.ptr,
- reachingDef, slot, builder);
- assert(stored && "projectViewValueToSlotValue contract violation");
+ if (aliasSlot.ptr != slot.ptr) {
+ stored = convertAliasValueToSlotValue(
+ stored, aliasSlot.ptr, reachingDef, slot, info.aliasMap, builder);
+ assert(stored && "projectAliasValueToSlotValue contract violation");
}
reachingDef = stored;
}
@@ -791,21 +809,22 @@ void MemorySlotPromoter::removeBlockingUses(Region *region) {
reachingDef = getOrCreateDefaultValue();
builder.setInsertionPointAfter(toPromote);
- MemorySlot viewSlot = getOpViewSlot(toPromote, slot).value_or(slot);
+ MemorySlot aliasSlot =
+ getOpAliasSlot(toPromote, slot, info.aliasMap).value_or(slot);
Value reachingDefAtBlockingUse = reachingDef;
- if (viewSlot.ptr != slot.ptr) {
- // Project the reaching definition to `viewSlot.elemType` to match
+ if (aliasSlot.ptr != slot.ptr) {
+ // Project the reaching definition to `aliasSlot.elemType` to match
// what `toPromoteMemOp` sees.
- reachingDefAtBlockingUse = convertSlotValueToViewValue(
- reachingDef, viewSlot.ptr, slot, builder);
+ reachingDefAtBlockingUse = convertSlotValueToAliasValue(
+ reachingDef, aliasSlot.ptr, slot, info.aliasMap, builder);
assert(reachingDefAtBlockingUse &&
- "projectSlotValueToViewValue contract violation");
+ "projectSlotValueToAliasValue contract violation");
}
if (toPromoteMemOp.removeBlockingUses(
- viewSlot, blockingUsesMap[toPromote], builder,
+ aliasSlot, blockingUsesMap[toPromote], builder,
reachingDefAtBlockingUse, dataLayout) == DeletionKind::Delete)
toErase.insert(toPromote);
- if (toPromoteMemOp.storesTo(viewSlot))
+ if (toPromoteMemOp.storesTo(aliasSlot))
if (Value replacedValue = replacedValuesMap[toPromoteMemOp])
replacedValues.push_back({toPromoteMemOp, replacedValue});
continue;
diff --git a/mlir/test/Transforms/mem2reg.mlir b/mlir/test/Transforms/mem2reg.mlir
index 064669534daa5..79190f5fc200e 100644
--- a/mlir/test/Transforms/mem2reg.mlir
+++ b/mlir/test/Transforms/mem2reg.mlir
@@ -184,74 +184,69 @@ func.func @poison_insertion_point(%val: f64) {
// -----
-// Verifies that mem2reg promotes a memory slot whose stores and loads are
-// reached through a transparent view operation that exposes itself via
-// PromotableAliaserInterface::getPromotableSlotView. The conditional store
-// on the view in ^bb1 must be discovered as a defining block, otherwise
-// the merge point at ^bb2 would not get a block argument and the promotion
-// would silently drop the conditional update.
-
-// CHECK-LABEL: func.func @promotable_through_view
+// Verifies that mem2reg promotes a memory slot accessed through a transparent
+// alias operation exposing itself via `getPromotableSlotAliases`. The
+// conditional store on the alias in ^bb1 must be discovered as a defining
+// block; otherwise, the merge point at ^bb2 would lack a block argument,
+// silently dropping the conditional update.
+
+// CHECK-LABEL: func.func @promotable_through_alias
// CHECK-SAME: (%[[A:.*]]: i32, %[[COND:.*]]: i1) -> i32
// CHECK-NOT: test.multi_slot_alloca
-// CHECK-NOT: test.transparent_view
+// CHECK-NOT: test.transparent_alias
// CHECK: %[[C42:.*]] = arith.constant 42 : i32
// CHECK: cf.cond_br %[[COND]], ^[[BB1:.*]], ^[[BB2:.*]](%[[C42]] : i32)
// CHECK: ^[[BB1]]:
// CHECK: cf.br ^[[BB2]](%[[A]] : i32)
// CHECK: ^[[BB2]](%[[MERGE:.*]]: i32):
// CHECK: return %[[MERGE]] : i32
-func.func @promotable_through_view(%a: i32, %cond: i1) -> i32 {
+func.func @promotable_through_alias(%a: i32, %cond: i1) -> i32 {
%c42 = arith.constant 42 : i32
%slot = test.multi_slot_alloca : () -> memref<i32>
- %view = test.transparent_view %slot : (memref<i32>) -> memref<i32>
- memref.store %c42, %view[] : memref<i32>
+ %alias = test.transparent_alias %slot : (memref<i32>) -> memref<i32>
+ memref.store %c42, %alias[] : memref<i32>
cf.cond_br %cond, ^bb1, ^bb2
^bb1:
- memref.store %a, %view[] : memref<i32>
+ memref.store %a, %alias[] : memref<i32>
cf.br ^bb2
^bb2:
- %v = memref.load %view[] : memref<i32>
+ %v = memref.load %alias[] : memref<i32>
return %v : i32
}
// -----
-// Type-changing transparent view: the store and load see the slot at f32
-// while the underlying allocation is at i32. mem2reg materialises an
-// `unrealized_conversion_cast` (the view op's `projectViewValueToSlotValue`)
-// at the store (f32 → i32 to update the reaching def at the slot's elem
-// type) and at the load (i32 → f32 via `projectSlotValueToViewValue` to feed
-// the load's f32 result type).
+// Type-changing transparent alias: the store and load access the slot as f32
+// while the underlying allocation is i32. mem2reg materializes an
+// `unrealized_conversion_cast` at the store (f32 → i32 via `projectAliasValueToSlotValue`)
+// and at the load (i32 → f32 via `projectSlotValueToAliasValue`).
-// CHECK-LABEL: func.func @promotable_through_cast_view
+// CHECK-LABEL: func.func @promotable_through_cast_alias
// CHECK-SAME: (%[[A:.*]]: f32) -> f32
// CHECK-NOT: test.multi_slot_alloca
-// CHECK-NOT: test.transparent_cast_view
+// CHECK-NOT: test.transparent_cast_alias
// CHECK: %[[I32:.*]] = builtin.unrealized_conversion_cast %[[A]] : f32 to i32
// CHECK: %{{.*}} = builtin.unrealized_conversion_cast %[[I32]] : i32 to f32
// CHECK: return %{{.*}} : f32
-func.func @promotable_through_cast_view(%a: f32) -> f32 {
+func.func @promotable_through_cast_alias(%a: f32) -> f32 {
%slot = test.multi_slot_alloca : () -> memref<i32>
- %view = test.transparent_cast_view %slot : (memref<i32>) -> memref<f32>
- memref.store %a, %view[] : memref<f32>
- %v = memref.load %view[] : memref<f32>
+ %alias = test.transparent_cast_alias %slot : (memref<i32>) -> memref<f32>
+ memref.store %a, %alias[] : memref<f32>
+ %v = memref.load %alias[] : memref<f32>
return %v : f32
}
// -----
// Same as above with a conditional store across blocks. The merge-point
-// block argument is at the root slot's element type (i32), and the
-// `projectViewValueToSlotValue` casts are inserted at the store sites
-// (f32 → i32) so the merge argument can carry the conditional update; the
-// load site inserts the inverse cast (i32 → f32) for its result via
-// `projectSlotValueToViewValue`.
+// block argument uses the root slot's element type (i32). Casts are inserted
+// at the store sites (f32 → i32 via `projectAliasValueToSlotValue`) and the
+// load site (i32 → f32 via `projectSlotValueToAliasValue`).
-// CHECK-LABEL: func.func @promotable_through_cast_view_blocks
+// CHECK-LABEL: func.func @promotable_through_cast_alias_blocks
// CHECK-SAME: (%[[A:.*]]: f32, %[[COND:.*]]: i1) -> f32
// CHECK-NOT: test.multi_slot_alloca
-// CHECK-NOT: test.transparent_cast_view
+// CHECK-NOT: test.transparent_cast_alias
// CHECK: %[[CST:.*]] = arith.constant 1.000000e+00 : f32
// CHECK: %[[CST_I32:.*]] = builtin.unrealized_conversion_cast %[[CST]] : f32 to i32
// CHECK: cf.cond_br %[[COND]], ^[[BB1:.*]], ^[[BB2:.*]](%[[CST_I32]] : i32)
@@ -261,39 +256,38 @@ func.func @promotable_through_cast_view(%a: f32) -> f32 {
// CHECK: ^[[BB2]](%[[MERGE:.*]]: i32):
// CHECK: %[[MERGE_F32:.*]] = builtin.unrealized_conversion_cast %[[MERGE]] : i32 to f32
// CHECK: return %[[MERGE_F32]] : f32
-func.func @promotable_through_cast_view_blocks(%a: f32, %cond: i1) -> f32 {
+func.func @promotable_through_cast_alias_blocks(%a: f32, %cond: i1) -> f32 {
%cst = arith.constant 1.0 : f32
%slot = test.multi_slot_alloca : () -> memref<i32>
- %view = test.transparent_cast_view %slot : (memref<i32>) -> memref<f32>
- memref.store %cst, %view[] : memref<f32>
+ %alias = test.transparent_cast_alias %slot : (memref<i32>) -> memref<f32>
+ memref.store %cst, %alias[] : memref<f32>
cf.cond_br %cond, ^bb1, ^bb2
^bb1:
- memref.store %a, %view[] : memref<f32>
+ memref.store %a, %alias[] : memref<f32>
cf.br ^bb2
^bb2:
- %v = memref.load %view[] : memref<f32>
+ %v = memref.load %alias[] : memref<f32>
return %v : f32
}
// -----
-// Regression test: the view is defined in the parent region but the store
-// owning the propagated blocking use lives in a nested region (`scf.if`).
-// The new blocking use must be registered under the owner's region, otherwise
-// `removeBlockingUses` trips the "all operations must still be in the same
-// region" invariant after `scf.if` rebuilds itself in `finalizePromotion`.
+// Regression test: the alias is defined in the parent region, but the store
+// is in a nested region (`scf.if`). The new blocking use must be registered
+// under the store's region; otherwise, `removeBlockingUses` fails the region
+// invariant after `scf.if` rebuilds itself in `finalizePromotion`.
-// CHECK-LABEL: func.func @promotable_through_view_across_regions
+// CHECK-LABEL: func.func @promotable_through_alias_across_regions
// CHECK-SAME: (%[[COND:.*]]: i1, %[[A:.*]]: i32)
// CHECK-NOT: test.multi_slot_alloca
-// CHECK-NOT: test.transparent_view
+// CHECK-NOT: test.transparent_alias
// CHECK-NOT: memref.store
// CHECK: scf.if %[[COND]]
-func.func @promotable_through_view_across_regions(%cond: i1, %a: i32) {
+func.func @promotable_through_alias_across_regions(%cond: i1, %a: i32) {
%slot = test.multi_slot_alloca : () -> memref<i32>
- %view = test.transparent_view %slot : (memref<i32>) -> memref<i32>
+ %alias = test.transparent_alias %slot : (memref<i32>) -> memref<i32>
scf.if %cond {
- memref.store %a, %view[] : memref<i32>
+ memref.store %a, %alias[] : memref<i32>
}
return
}
diff --git a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
index 7605172b6c3bb..1c1d02145602c 100644
--- a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
+++ b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
@@ -1770,15 +1770,19 @@ TestMultiSlotAlloca::handleDestructuringComplete(
}
//===----------------------------------------------------------------------===//
-// TestTransparentView
+// TestTransparentAlias
//===----------------------------------------------------------------------===//
-std::optional<PromotableSlotView> TestTransparentView::getPromotableSlotView() {
+void TestTransparentAlias::getPromotableSlotAliases(
+ OpOperand &aliasedSlotPointerOperand, const MemorySlot & /*parentSlot*/,
+ SmallVectorImpl<MemorySlot> &newMemorySlots) {
+ if (aliasedSlotPointerOperand.get() != getSource())
+ return;
Type elemType = cast<MemRefType>(getResult().getType()).getElementType();
- return PromotableSlotView{getSource(), MemorySlot{getResult(), elemType}};
+ newMemorySlots.push_back(MemorySlot{getResult(), elemType});
}
-bool TestTransparentView::canUsesBeRemoved(
+bool TestTransparentAlias::canUsesBeRemoved(
const SmallPtrSetImpl<OpOperand *> &blockingUses,
SmallVectorImpl<OpOperand *> &newBlockingUses,
const DataLayout &dataLayout) {
@@ -1787,22 +1791,25 @@ bool TestTransparentView::canUsesBeRemoved(
return true;
}
-DeletionKind TestTransparentView::removeBlockingUses(
+DeletionKind TestTransparentAlias::removeBlockingUses(
const SmallPtrSetImpl<OpOperand *> &blockingUses, OpBuilder &builder) {
return DeletionKind::Delete;
}
//===----------------------------------------------------------------------===//
-// TestTransparentCastView
+// TestTransparentCastAlias
//===----------------------------------------------------------------------===//
-std::optional<PromotableSlotView>
-TestTransparentCastView::getPromotableSlotView() {
+void TestTransparentCastAlias::getPromotableSlotAliases(
+ OpOperand &aliasedSlotPointerOperand, const MemorySlot & /*parentSlot*/,
+ SmallVectorImpl<MemorySlot> &newMemorySlots) {
+ if (aliasedSlotPointerOperand.get() != getSource())
+ return;
Type elemType = cast<MemRefType>(getResult().getType()).getElementType();
- return PromotableSlotView{getSource(), MemorySlot{getResult(), elemType}};
+ newMemorySlots.push_back(MemorySlot{getResult(), elemType});
}
-bool TestTransparentCastView::canUsesBeRemoved(
+bool TestTransparentCastAlias::canUsesBeRemoved(
const SmallPtrSetImpl<OpOperand *> &blockingUses,
SmallVectorImpl<OpOperand *> &newBlockingUses,
const DataLayout &dataLayout) {
@@ -1811,28 +1818,30 @@ bool TestTransparentCastView::canUsesBeRemoved(
return true;
}
-DeletionKind TestTransparentCastView::removeBlockingUses(
+DeletionKind TestTransparentCastAlias::removeBlockingUses(
const SmallPtrSetImpl<OpOperand *> &blockingUses, OpBuilder &builder) {
return DeletionKind::Delete;
}
-Value TestTransparentCastView::projectSlotValueToViewValue(Value value,
- Type targetType,
- OpBuilder &builder) {
- if (value.getType() == targetType)
- return value;
- return UnrealizedConversionCastOp::create(builder, getLoc(), targetType,
- value)
+Value TestTransparentCastAlias::projectSlotValueToAliasValue(
+ OpOperand & /*aliasedSlotPointerOperand*/,
+ const MemorySlot & /*parentSlot*/, const MemorySlot &aliasSlot,
+ Value slotValue, OpBuilder &builder) {
+ if (slotValue.getType() == aliasSlot.elemType)
+ return slotValue;
+ return UnrealizedConversionCastOp::create(builder, getLoc(),
+ aliasSlot.elemType, slotValue)
.getResult(0);
}
-Value TestTransparentCastView::projectViewValueToSlotValue(
- Value viewValue, Type targetType, Value /*reachingDef*/,
+Value TestTransparentCastAlias::projectAliasValueToSlotValue(
+ OpOperand & /*aliasedSlotPointerOperand*/, const MemorySlot &parentSlot,
+ const MemorySlot & /*aliasSlot*/, Value aliasValue, Value /*reachingDef*/,
OpBuilder &builder) {
- if (viewValue.getType() == targetType)
- return viewValue;
- return UnrealizedConversionCastOp::create(builder, getLoc(), targetType,
- viewValue)
+ if (aliasValue.getType() == parentSlot.elemType)
+ return aliasValue;
+ return UnrealizedConversionCastOp::create(builder, getLoc(),
+ parentSlot.elemType, aliasValue)
.getResult(0);
}
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index 029d65f071e04..90d1142cdac74 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -3941,30 +3941,29 @@ def TestMultiSlotAlloca : TEST_Op<"multi_slot_alloca",
let assemblyFormat = "attr-dict `:` functional-type(operands, results)";
}
-// Same-element-type transparent view of a memref slot. Exercises the
-// view-chain handling in mem2reg with identity projections.
-def TestTransparentView : TEST_Op<"transparent_view",
+// Transparent alias of a memref slot with the same element type. Exercises
+// alias-chain handling in mem2reg with identity projections.
+def TestTransparentAlias : TEST_Op<"transparent_alias",
[DeclareOpInterfaceMethods<PromotableOpInterface,
["canUsesBeRemoved",
"removeBlockingUses"]>,
DeclareOpInterfaceMethods<PromotableAliaserInterface,
- ["getPromotableSlotView"]>]> {
+ ["getPromotableSlotAliases"]>]> {
let arguments = (ins MemRefOf<[I32]>:$source);
let results = (outs MemRefOf<[I32]>:$result);
let assemblyFormat = "$source attr-dict `:` functional-type($source, $result)";
}
-// Type-changing transparent view of a memref slot. The result aliases the
-// source at a different element type; the projection methods bridge
-// between the two element types using `builtin.unrealized_conversion_cast`.
-def TestTransparentCastView : TEST_Op<"transparent_cast_view",
+// Type-changing transparent alias of a memref slot. The projection methods
+// bridge the element types using `builtin.unrealized_conversion_cast`.
+def TestTransparentCastAlias : TEST_Op<"transparent_cast_alias",
[DeclareOpInterfaceMethods<PromotableOpInterface,
["canUsesBeRemoved",
"removeBlockingUses"]>,
DeclareOpInterfaceMethods<PromotableAliaserInterface,
- ["getPromotableSlotView",
- "projectSlotValueToViewValue",
- "projectViewValueToSlotValue"]>]> {
+ ["getPromotableSlotAliases",
+ "projectSlotValueToAliasValue",
+ "projectAliasValueToSlotValue"]>]> {
let arguments = (ins MemRefOf<[I32, F32]>:$source);
let results = (outs MemRefOf<[I32, F32]>:$result);
let assemblyFormat = "$source attr-dict `:` functional-type($source, $result)";
>From 99824dc277a6a6cd59d920ae06bdb1a345b7def5 Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Tue, 19 May 2026 06:51:21 -0700
Subject: [PATCH 07/12] add dual aliaser test
---
mlir/test/Transforms/mem2reg.mlir | 27 +++++++++++
mlir/test/lib/Dialect/Test/TestOpDefs.cpp | 56 +++++++++++++++++++++++
mlir/test/lib/Dialect/Test/TestOps.td | 20 ++++++++
3 files changed, 103 insertions(+)
diff --git a/mlir/test/Transforms/mem2reg.mlir b/mlir/test/Transforms/mem2reg.mlir
index 79190f5fc200e..293ec23c33df4 100644
--- a/mlir/test/Transforms/mem2reg.mlir
+++ b/mlir/test/Transforms/mem2reg.mlir
@@ -291,3 +291,30 @@ func.func @promotable_through_alias_across_regions(%cond: i1, %a: i32) {
}
return
}
+
+// -----
+
+// Dual-alias case: a single aliaser op exposes two simultaneously usable
+// aliases of the same parent slot (signless i32) at different signednesses
+// (signed and unsigned i32). `getPromotableSlotAliases` populates two
+// entries for that operand, both of which end up in the alias map. The
+// store reaches the slot through the signed alias and the load reaches it
+// through the unsigned alias.
+
+// CHECK-LABEL: func.func @promotable_through_dual_alias
+// CHECK-SAME: (%[[A:.*]]: si32) -> ui32
+// CHECK-NOT: memref.alloca
+// CHECK-NOT: test.transparent_dual_alias
+// CHECK-NOT: memref.store
+// CHECK-NOT: memref.load
+// CHECK: %[[A_I32:.*]] = builtin.unrealized_conversion_cast %[[A]] : si32 to i32
+// CHECK: %[[A_UI32:.*]] = builtin.unrealized_conversion_cast %[[A_I32]] : i32 to ui32
+// CHECK: return %[[A_UI32]] : ui32
+func.func @promotable_through_dual_alias(%a: si32) -> ui32 {
+ %slot = memref.alloca() : memref<i32>
+ %signed, %unsigned = test.transparent_dual_alias %slot
+ : (memref<i32>) -> (memref<si32>, memref<ui32>)
+ memref.store %a, %signed[] : memref<si32>
+ %v = memref.load %unsigned[] : memref<ui32>
+ return %v : ui32
+}
diff --git a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
index 1c1d02145602c..08f96e2cb4cc0 100644
--- a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
+++ b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
@@ -1845,6 +1845,62 @@ Value TestTransparentCastAlias::projectAliasValueToSlotValue(
.getResult(0);
}
+//===----------------------------------------------------------------------===//
+// TestTransparentDualAlias
+//===----------------------------------------------------------------------===//
+
+void TestTransparentDualAlias::getPromotableSlotAliases(
+ OpOperand &aliasedSlotPointerOperand, const MemorySlot & /*parentSlot*/,
+ SmallVectorImpl<MemorySlot> &newMemorySlots) {
+ if (aliasedSlotPointerOperand.get() != getSource())
+ return;
+ // Expose both results as aliases of the same parent at their own
+ // signedness (same bit width as the parent's signless i32).
+ newMemorySlots.push_back(MemorySlot{
+ getResultSigned(),
+ cast<MemRefType>(getResultSigned().getType()).getElementType()});
+ newMemorySlots.push_back(MemorySlot{
+ getResultUnsigned(),
+ cast<MemRefType>(getResultUnsigned().getType()).getElementType()});
+}
+
+bool TestTransparentDualAlias::canUsesBeRemoved(
+ const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ SmallVectorImpl<OpOperand *> &newBlockingUses,
+ const DataLayout &dataLayout) {
+ for (Value result : getResults())
+ for (OpOperand &use : result.getUses())
+ newBlockingUses.push_back(&use);
+ return true;
+}
+
+DeletionKind TestTransparentDualAlias::removeBlockingUses(
+ const SmallPtrSetImpl<OpOperand *> &blockingUses, OpBuilder &builder) {
+ return DeletionKind::Delete;
+}
+
+Value TestTransparentDualAlias::projectSlotValueToAliasValue(
+ OpOperand & /*aliasedSlotPointerOperand*/,
+ const MemorySlot & /*parentSlot*/, const MemorySlot &aliasSlot,
+ Value slotValue, OpBuilder &builder) {
+ if (slotValue.getType() == aliasSlot.elemType)
+ return slotValue;
+ return UnrealizedConversionCastOp::create(builder, getLoc(),
+ aliasSlot.elemType, slotValue)
+ .getResult(0);
+}
+
+Value TestTransparentDualAlias::projectAliasValueToSlotValue(
+ OpOperand & /*aliasedSlotPointerOperand*/, const MemorySlot &parentSlot,
+ const MemorySlot & /*aliasSlot*/, Value aliasValue, Value /*reachingDef*/,
+ OpBuilder &builder) {
+ if (aliasValue.getType() == parentSlot.elemType)
+ return aliasValue;
+ return UnrealizedConversionCastOp::create(builder, getLoc(),
+ parentSlot.elemType, aliasValue)
+ .getResult(0);
+}
+
namespace {
/// Returns test dialect's memref layout for test dialect's tensor encoding when
/// applicable.
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index 90d1142cdac74..688b60d48de2f 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -3969,6 +3969,26 @@ def TestTransparentCastAlias : TEST_Op<"transparent_cast_alias",
let assemblyFormat = "$source attr-dict `:` functional-type($source, $result)";
}
+// Transparent alias of a memref slot exposing two simultaneously-usable
+// aliases of the same bit-width at different signednesses (signed and
+// unsigned 32-bit integers, both aliasing the signless i32 storage).
+// Exercises mem2reg's handling of an aliaser whose
+// `getPromotableSlotAliases` returns more than one entry for a single
+// aliased operand.
+def TestTransparentDualAlias : TEST_Op<"transparent_dual_alias",
+ [DeclareOpInterfaceMethods<PromotableOpInterface,
+ ["canUsesBeRemoved",
+ "removeBlockingUses"]>,
+ DeclareOpInterfaceMethods<PromotableAliaserInterface,
+ ["getPromotableSlotAliases",
+ "projectSlotValueToAliasValue",
+ "projectAliasValueToSlotValue"]>]> {
+ let arguments = (ins MemRefOf<[I32]>:$source);
+ let results = (outs MemRefOf<[SI32]>:$result_signed,
+ MemRefOf<[UI32]>:$result_unsigned);
+ let assemblyFormat = "$source attr-dict `:` functional-type($source, results)";
+}
+
//===----------------------------------------------------------------------===//
// Test allocation Ops
//===----------------------------------------------------------------------===//
>From 496ef0b90f20fdcfae5a3dda86d33f7f43360036 Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Thu, 21 May 2026 02:36:29 -0700
Subject: [PATCH 08/12] update getOpAliasSlot and buildAliasChain and add test
---
.../mlir/Interfaces/MemorySlotInterfaces.h | 21 +++-----
mlir/lib/Interfaces/MemorySlotInterfaces.cpp | 54 ++++++++-----------
mlir/lib/Transforms/Mem2Reg.cpp | 14 ++---
mlir/test/Transforms/mem2reg.mlir | 22 ++++++++
4 files changed, 59 insertions(+), 52 deletions(-)
diff --git a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
index ad3f5a2c9113f..a709c17c30e5e 100644
--- a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
+++ b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
@@ -48,11 +48,10 @@ enum class DeletionKind {
namespace mlir {
-/// An entry in a `PromotableAliasMap`: the memory slot exposed by an aliaser
-/// operation, along with the operand it aliases from.
+/// An entry in a `PromotableAliasMap`: the memory slot defined by an aliaser
+/// operation and its source operand.
struct PromotableSlotAliasInfo {
- /// The slot exposed by the aliaser (its `ptr` is a result of the aliaser
- /// and equals the map key).
+ /// The slot defined by the aliaser (its `ptr` is the map key).
MemorySlot slot;
/// The aliaser operand whose value is the parent slot's pointer.
OpOperand *aliasedSlotPointerOperand;
@@ -70,20 +69,14 @@ void populatePromotableAliasMap(PromotableAliaserInterface aliaser,
const MemorySlot &rootSlot,
PromotableAliasMap &aliasMap);
-/// Returns a `MemorySlot` representing the operand of `op` that aliases
-/// `rootSlot.ptr`, using the alias's element type. Returns `nullopt` if no
-/// operand is an alias of `rootSlot`.
+/// Finds the memory slot accessed by `op` that aliases `rootSlot.ptr`.
+/// Returns `nullopt` if no operand aliases `rootSlot`, or if the operation
+/// uses multiple distinct aliases of `rootSlot` (as `PromotableMemOpInterface`
+/// only supports a single slot).
std::optional<MemorySlot> getOpAliasSlot(Operation *op,
const MemorySlot &rootSlot,
const PromotableAliasMap &aliasMap);
-/// Returns true if at most one of `op`'s operands aliases `rootSlot`.
-/// This is useful to guard `getOpAliasSlot` calls, as operations reaching
-/// the root through multiple distinct aliases (e.g., memcpy between aliases)
-/// cannot be handled by interfaces expecting a single slot.
-bool isUsingAtMostOneSlotAlias(Operation *op, const MemorySlot &rootSlot,
- const PromotableAliasMap &aliasMap);
-
/// Projects `slotValue` down to the element type of `aliasPtr` by chaining
/// `projectSlotValueToAliasValue` calls along the alias chain. Returns a null
/// value if any projection step fails.
diff --git a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
index 7cd8e96d892de..7050a80ff833a 100644
--- a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
+++ b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
@@ -46,25 +46,16 @@ void mlir::populatePromotableAliasMap(PromotableAliaserInterface aliaser,
std::optional<MemorySlot>
mlir::getOpAliasSlot(Operation *op, const MemorySlot &rootSlot,
const PromotableAliasMap &aliasMap) {
- for (Value operand : op->getOperands())
- if (std::optional<MemorySlot> slot =
- getParentSlot(operand, rootSlot, aliasMap))
- return slot;
- return std::nullopt;
-}
-
-bool mlir::isUsingAtMostOneSlotAlias(Operation *op, const MemorySlot &rootSlot,
- const PromotableAliasMap &aliasMap) {
- Value uniqueAliasPtr;
+ std::optional<MemorySlot> uniqueSlot;
for (Value operand : op->getOperands()) {
std::optional<MemorySlot> slot = getParentSlot(operand, rootSlot, aliasMap);
if (!slot)
continue;
- if (uniqueAliasPtr && uniqueAliasPtr != slot->ptr)
- return false;
- uniqueAliasPtr = slot->ptr;
+ if (uniqueSlot && uniqueSlot->ptr != slot->ptr)
+ return std::nullopt;
+ uniqueSlot = slot;
}
- return true;
+ return uniqueSlot;
}
namespace {
@@ -78,41 +69,40 @@ struct ChainStep {
};
} // namespace
-/// Walks back from `aliasPtr` to `rootSlot.ptr` through `aliasMap`,
-/// populating `chainOut` from leaf to root. Returns false if `aliasPtr`
-/// is not a known alias of `rootSlot`.
-static bool buildAliasChain(Value aliasPtr, const MemorySlot &rootSlot,
- const PromotableAliasMap &aliasMap,
- SmallVectorImpl<ChainStep> &chainOut) {
- if (aliasPtr == rootSlot.ptr)
- return true;
+/// Walks from `aliasPtr` back to `rootSlot.ptr` via `aliasMap`. Returns the
+/// leaf-to-root chain, or `nullopt` if `aliasPtr` is not a known alias.
+static std::optional<SmallVector<ChainStep>>
+buildAliasChain(Value aliasPtr, const MemorySlot &rootSlot,
+ const PromotableAliasMap &aliasMap) {
+ SmallVector<ChainStep> chain;
Value current = aliasPtr;
while (current != rootSlot.ptr) {
auto it = aliasMap.find(current);
if (it == aliasMap.end())
- return false;
+ return std::nullopt;
OpOperand *operand = it->second.aliasedSlotPointerOperand;
auto aliaser = cast<PromotableAliaserInterface>(operand->getOwner());
std::optional<MemorySlot> parent =
getParentSlot(operand->get(), rootSlot, aliasMap);
if (!parent)
- return false;
- chainOut.push_back(ChainStep{aliaser, operand, *parent, it->second.slot});
+ return std::nullopt;
+ chain.push_back(ChainStep{aliaser, operand, *parent, it->second.slot});
current = operand->get();
}
- return true;
+ return chain;
}
Value mlir::convertSlotValueToAliasValue(Value slotValue, Value aliasPtr,
const MemorySlot &rootSlot,
const PromotableAliasMap &aliasMap,
OpBuilder &builder) {
- SmallVector<ChainStep> chain;
- if (!buildAliasChain(aliasPtr, rootSlot, aliasMap, chain))
+ std::optional<SmallVector<ChainStep>> chain =
+ buildAliasChain(aliasPtr, rootSlot, aliasMap);
+ if (!chain)
return {};
Value current = slotValue;
// Root-to-leaf walk: reverse the leaf-first chain.
- for (ChainStep &step : llvm::reverse(chain)) {
+ for (ChainStep &step : llvm::reverse(*chain)) {
current = step.aliaser.projectSlotValueToAliasValue(
*step.aliasedSlotPointerOperand, step.parentSlot, step.aliasSlot,
current, builder);
@@ -127,9 +117,11 @@ Value mlir::convertAliasValueToSlotValue(Value aliasValue, Value aliasPtr,
const MemorySlot &rootSlot,
const PromotableAliasMap &aliasMap,
OpBuilder &builder) {
- SmallVector<ChainStep> chain;
- if (!buildAliasChain(aliasPtr, rootSlot, aliasMap, chain))
+ std::optional<SmallVector<ChainStep>> chainOpt =
+ buildAliasChain(aliasPtr, rootSlot, aliasMap);
+ if (!chainOpt)
return {};
+ SmallVector<ChainStep> &chain = *chainOpt;
// Project `rootReachingDef` down to each step's parent level so the
// per-step projector can use it (needed for partial sub-aliases; full
diff --git a/mlir/lib/Transforms/Mem2Reg.cpp b/mlir/lib/Transforms/Mem2Reg.cpp
index e4f315a47f13e..408815686738a 100644
--- a/mlir/lib/Transforms/Mem2Reg.cpp
+++ b/mlir/lib/Transforms/Mem2Reg.cpp
@@ -409,14 +409,14 @@ LogicalResult MemorySlotPromotionAnalyzer::computeBlockingUses(
return failure();
regionsWithDirectUse.insert(user->getParentRegion());
} else if (auto promotable = dyn_cast<PromotableMemOpInterface>(user)) {
- // If the memop reaches the root slot through multiple distinct alias
- // operands, promotion fails. `PromotableMemOpInterface` currently
- // expects a single slot per call. Supporting multiple aliases would
- // require extending the interface.
- if (!isUsingAtMostOneSlotAlias(user, slot, aliasMap))
+ // `getOpAliasSlot` returns `nullopt` if the op uses multiple distinct
+ // aliases. Promotion fails in this case, as `PromotableMemOpInterface`
+ // expects a single slot per call.
+ std::optional<MemorySlot> aliasSlotOpt =
+ getOpAliasSlot(user, slot, aliasMap);
+ if (!aliasSlotOpt)
return failure();
- MemorySlot aliasSlot =
- getOpAliasSlot(user, slot, aliasMap).value_or(slot);
+ MemorySlot aliasSlot = *aliasSlotOpt;
if (!promotable.canUsesBeRemoved(aliasSlot, blockingUses, newBlockingUses,
dataLayout))
return failure();
diff --git a/mlir/test/Transforms/mem2reg.mlir b/mlir/test/Transforms/mem2reg.mlir
index 293ec23c33df4..40006a1c61f51 100644
--- a/mlir/test/Transforms/mem2reg.mlir
+++ b/mlir/test/Transforms/mem2reg.mlir
@@ -294,6 +294,28 @@ func.func @promotable_through_alias_across_regions(%cond: i1, %a: i32) {
// -----
+// Chained aliasers: an identity alias is aliased by a type-changing alias.
+// The alias-map walk must follow both hops and project through each step.
+
+// CHECK-LABEL: func.func @promotable_through_chained_aliases
+// CHECK-SAME: (%[[A:.*]]: f32) -> f32
+// CHECK-NOT: test.multi_slot_alloca
+// CHECK-NOT: test.transparent_alias
+// CHECK-NOT: test.transparent_cast_alias
+// CHECK: %[[I32:.*]] = builtin.unrealized_conversion_cast %[[A]] : f32 to i32
+// CHECK: %{{.*}} = builtin.unrealized_conversion_cast %[[I32]] : i32 to f32
+// CHECK: return %{{.*}} : f32
+func.func @promotable_through_chained_aliases(%a: f32) -> f32 {
+ %slot = test.multi_slot_alloca : () -> memref<i32>
+ %alias1 = test.transparent_alias %slot : (memref<i32>) -> memref<i32>
+ %alias2 = test.transparent_cast_alias %alias1 : (memref<i32>) -> memref<f32>
+ memref.store %a, %alias2[] : memref<f32>
+ %v = memref.load %alias2[] : memref<f32>
+ return %v : f32
+}
+
+// -----
+
// Dual-alias case: a single aliaser op exposes two simultaneously usable
// aliases of the same parent slot (signless i32) at different signednesses
// (signed and unsigned i32). `getPromotableSlotAliases` populates two
>From 5a2b8ccbadedf868b6fa81e3397991f78baf8615 Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Thu, 21 May 2026 04:47:46 -0700
Subject: [PATCH 09/12] revert getOpAliasSlot change and add back
referencesAtMostOneAliasOfSlot
---
.../mlir/Interfaces/MemorySlotInterfaces.h | 18 +++++++++++++----
mlir/lib/Interfaces/MemorySlotInterfaces.cpp | 20 ++++++++++++++-----
mlir/lib/Transforms/Mem2Reg.cpp | 14 ++++++-------
3 files changed, 36 insertions(+), 16 deletions(-)
diff --git a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
index a709c17c30e5e..2dd2b2a1a2e6c 100644
--- a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
+++ b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
@@ -69,14 +69,24 @@ void populatePromotableAliasMap(PromotableAliaserInterface aliaser,
const MemorySlot &rootSlot,
PromotableAliasMap &aliasMap);
-/// Finds the memory slot accessed by `op` that aliases `rootSlot.ptr`.
-/// Returns `nullopt` if no operand aliases `rootSlot`, or if the operation
-/// uses multiple distinct aliases of `rootSlot` (as `PromotableMemOpInterface`
-/// only supports a single slot).
+/// Returns a `MemorySlot` for the operand of `op` that aliases `rootSlot.ptr`
+/// (either the root itself or a known entry in `aliasMap`), providing the
+/// alias's element type. Returns `nullopt` if no operand of `op` reaches
+/// `rootSlot`. If `op` reaches `rootSlot` through multiple distinct aliases
+/// (e.g., a memcpy between two aliases of the same root), the result is one
+/// of them; use `referencesAtMostOneAliasOfSlot` to rule this out.
std::optional<MemorySlot> getOpAliasSlot(Operation *op,
const MemorySlot &rootSlot,
const PromotableAliasMap &aliasMap);
+/// Returns true if `op`'s operands reach `rootSlot` through at most one
+/// distinct alias pointer (the root itself or a single `aliasMap` entry).
+/// Multiple operands referencing the same alias are allowed. This is used to
+/// guard `PromotableMemOpInterface` calls, which assume a single slot per
+/// operation.
+bool referencesAtMostOneAliasOfSlot(Operation *op, const MemorySlot &rootSlot,
+ const PromotableAliasMap &aliasMap);
+
/// Projects `slotValue` down to the element type of `aliasPtr` by chaining
/// `projectSlotValueToAliasValue` calls along the alias chain. Returns a null
/// value if any projection step fails.
diff --git a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
index 7050a80ff833a..ff9ccd461afec 100644
--- a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
+++ b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
@@ -46,16 +46,26 @@ void mlir::populatePromotableAliasMap(PromotableAliaserInterface aliaser,
std::optional<MemorySlot>
mlir::getOpAliasSlot(Operation *op, const MemorySlot &rootSlot,
const PromotableAliasMap &aliasMap) {
- std::optional<MemorySlot> uniqueSlot;
+ for (Value operand : op->getOperands())
+ if (std::optional<MemorySlot> slot =
+ getParentSlot(operand, rootSlot, aliasMap))
+ return slot;
+ return std::nullopt;
+}
+
+bool mlir::referencesAtMostOneAliasOfSlot(Operation *op,
+ const MemorySlot &rootSlot,
+ const PromotableAliasMap &aliasMap) {
+ Value uniqueAliasPtr;
for (Value operand : op->getOperands()) {
std::optional<MemorySlot> slot = getParentSlot(operand, rootSlot, aliasMap);
if (!slot)
continue;
- if (uniqueSlot && uniqueSlot->ptr != slot->ptr)
- return std::nullopt;
- uniqueSlot = slot;
+ if (uniqueAliasPtr && uniqueAliasPtr != slot->ptr)
+ return false;
+ uniqueAliasPtr = slot->ptr;
}
- return uniqueSlot;
+ return true;
}
namespace {
diff --git a/mlir/lib/Transforms/Mem2Reg.cpp b/mlir/lib/Transforms/Mem2Reg.cpp
index 408815686738a..f4ccb421b0683 100644
--- a/mlir/lib/Transforms/Mem2Reg.cpp
+++ b/mlir/lib/Transforms/Mem2Reg.cpp
@@ -409,14 +409,14 @@ LogicalResult MemorySlotPromotionAnalyzer::computeBlockingUses(
return failure();
regionsWithDirectUse.insert(user->getParentRegion());
} else if (auto promotable = dyn_cast<PromotableMemOpInterface>(user)) {
- // `getOpAliasSlot` returns `nullopt` if the op uses multiple distinct
- // aliases. Promotion fails in this case, as `PromotableMemOpInterface`
- // expects a single slot per call.
- std::optional<MemorySlot> aliasSlotOpt =
- getOpAliasSlot(user, slot, aliasMap);
- if (!aliasSlotOpt)
+ // If the memop reaches the root slot through multiple distinct alias
+ // operands, promotion fails. `PromotableMemOpInterface` expects a
+ // single slot per call. Supporting multiple aliases would require
+ // extending the interface.
+ if (!referencesAtMostOneAliasOfSlot(user, slot, aliasMap))
return failure();
- MemorySlot aliasSlot = *aliasSlotOpt;
+ MemorySlot aliasSlot =
+ getOpAliasSlot(user, slot, aliasMap).value_or(slot);
if (!promotable.canUsesBeRemoved(aliasSlot, blockingUses, newBlockingUses,
dataLayout))
return failure();
>From ef5e42e88b289dfa6bc7336b9e214a53032fdc83 Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Fri, 22 May 2026 04:59:43 -0700
Subject: [PATCH 10/12] iterate on aliases, more tests, reword comments
---
.../mlir/Interfaces/MemorySlotInterfaces.h | 24 +++++---
mlir/lib/Interfaces/MemorySlotInterfaces.cpp | 18 +++---
mlir/lib/Transforms/Mem2Reg.cpp | 35 ++++++-----
mlir/test/Transforms/mem2reg.mlir | 61 +++++++++++++++++++
mlir/test/lib/Dialect/Test/TestOpDefs.cpp | 49 +++++++++++++++
mlir/test/lib/Dialect/Test/TestOps.td | 19 ++++++
6 files changed, 174 insertions(+), 32 deletions(-)
diff --git a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
index 2dd2b2a1a2e6c..4ddacd67f99c2 100644
--- a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
+++ b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.h
@@ -57,7 +57,7 @@ struct PromotableSlotAliasInfo {
OpOperand *aliasedSlotPointerOperand;
};
-/// Maps an alias value (a result of a `PromotableAliaserInterface` op)
+/// Maps an alias slot pointer (a result of a `PromotableAliaserInterface` op)
/// reachable from a root slot to its `PromotableSlotAliasInfo`.
using PromotableAliasMap =
llvm::SmallDenseMap<Value, PromotableSlotAliasInfo, 4>;
@@ -87,19 +87,23 @@ std::optional<MemorySlot> getOpAliasSlot(Operation *op,
bool referencesAtMostOneAliasOfSlot(Operation *op, const MemorySlot &rootSlot,
const PromotableAliasMap &aliasMap);
-/// Projects `slotValue` down to the element type of `aliasPtr` by chaining
-/// `projectSlotValueToAliasValue` calls along the alias chain. Returns a null
-/// value if any projection step fails.
-Value convertSlotValueToAliasValue(Value slotValue, Value aliasPtr,
+/// Walks the alias chain from `rootSlot` down to `aliasSlot`. Calls
+/// `projectSlotValueToAliasValue` at each step to convert `slotValue`
+/// (initially the root slot's value) to `aliasSlot`'s value. Returns a null
+/// value if any projection fails.
+Value convertSlotValueToAliasValue(Value slotValue, const MemorySlot &aliasSlot,
const MemorySlot &rootSlot,
const PromotableAliasMap &aliasMap,
OpBuilder &builder);
-/// Projects `aliasValue` back up to `rootSlot.elemType` by chaining
-/// `projectAliasValueToSlotValue` calls backwards along the alias chain.
-/// `rootReachingDef` provides the current slot value, which is projected
-/// down at each step to supply the required reaching definition.
-Value convertAliasValueToSlotValue(Value aliasValue, Value aliasPtr,
+/// Walks the alias chain from `aliasSlot` back up to `rootSlot`. Calls
+/// `projectAliasValueToSlotValue` at each step to convert `aliasValue`
+/// (initially `aliasSlot`'s value) to the root slot's value.
+/// `rootReachingDef` is the current value of the root slot; it is projected
+/// down to each intermediate slot to provide the reaching definition required
+/// by partial sub-aliases.
+Value convertAliasValueToSlotValue(Value aliasValue,
+ const MemorySlot &aliasSlot,
Value rootReachingDef,
const MemorySlot &rootSlot,
const PromotableAliasMap &aliasMap,
diff --git a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
index ff9ccd461afec..57c00aa031d37 100644
--- a/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
+++ b/mlir/lib/Interfaces/MemorySlotInterfaces.cpp
@@ -79,13 +79,13 @@ struct ChainStep {
};
} // namespace
-/// Walks from `aliasPtr` back to `rootSlot.ptr` via `aliasMap`. Returns the
-/// leaf-to-root chain, or `nullopt` if `aliasPtr` is not a known alias.
+/// Walks from `aliasSlot` back to `rootSlot` via `aliasMap`. Returns the
+/// leaf-to-root chain, or `nullopt` if `aliasSlot` is not a known alias.
static std::optional<SmallVector<ChainStep>>
-buildAliasChain(Value aliasPtr, const MemorySlot &rootSlot,
+buildAliasChain(const MemorySlot &aliasSlot, const MemorySlot &rootSlot,
const PromotableAliasMap &aliasMap) {
SmallVector<ChainStep> chain;
- Value current = aliasPtr;
+ Value current = aliasSlot.ptr;
while (current != rootSlot.ptr) {
auto it = aliasMap.find(current);
if (it == aliasMap.end())
@@ -102,12 +102,13 @@ buildAliasChain(Value aliasPtr, const MemorySlot &rootSlot,
return chain;
}
-Value mlir::convertSlotValueToAliasValue(Value slotValue, Value aliasPtr,
+Value mlir::convertSlotValueToAliasValue(Value slotValue,
+ const MemorySlot &aliasSlot,
const MemorySlot &rootSlot,
const PromotableAliasMap &aliasMap,
OpBuilder &builder) {
std::optional<SmallVector<ChainStep>> chain =
- buildAliasChain(aliasPtr, rootSlot, aliasMap);
+ buildAliasChain(aliasSlot, rootSlot, aliasMap);
if (!chain)
return {};
Value current = slotValue;
@@ -122,13 +123,14 @@ Value mlir::convertSlotValueToAliasValue(Value slotValue, Value aliasPtr,
return current;
}
-Value mlir::convertAliasValueToSlotValue(Value aliasValue, Value aliasPtr,
+Value mlir::convertAliasValueToSlotValue(Value aliasValue,
+ const MemorySlot &aliasSlot,
Value rootReachingDef,
const MemorySlot &rootSlot,
const PromotableAliasMap &aliasMap,
OpBuilder &builder) {
std::optional<SmallVector<ChainStep>> chainOpt =
- buildAliasChain(aliasPtr, rootSlot, aliasMap);
+ buildAliasChain(aliasSlot, rootSlot, aliasMap);
if (!chainOpt)
return {};
SmallVector<ChainStep> &chain = *chainOpt;
diff --git a/mlir/lib/Transforms/Mem2Reg.cpp b/mlir/lib/Transforms/Mem2Reg.cpp
index f4ccb421b0683..22664b35deea9 100644
--- a/mlir/lib/Transforms/Mem2Reg.cpp
+++ b/mlir/lib/Transforms/Mem2Reg.cpp
@@ -534,17 +534,24 @@ MemorySlotPromotionAnalyzer::computeInfo() {
// inherited from a nested region. As a side effect, `definingBlocks` contains
// all regions with at least one store.
//
- // Iterating `info.userToBlockingUses` lets this also pick up stores that
- // reach the slot through chains of aliases (`getPromotableSlotAliases`).
+ // Iterate over direct users of the slot pointer and all alias pointers in
+ // `info.aliasMap`. This assumes `PromotableMemOpInterface` operations storing
+ // to the slot use the slot pointer or its aliases directly. Dialects must
+ // implement `PromotableAliaserInterface` for views/aliasing, rather than
+ // manually walking operand definitions in `PromotableMemOpInterface` to find
+ // indirect slot usages.
DenseMap<Region *, SmallPtrSet<Block *, 16>> definingBlocks;
- for (auto &[region, opsMap] : info.userToBlockingUses)
- for (auto &[user, _blockingUses] : opsMap)
- if (auto storeOp = dyn_cast<PromotableMemOpInterface>(user)) {
- MemorySlot aliasSlot =
- getOpAliasSlot(user, slot, info.aliasMap).value_or(slot);
- if (storeOp.storesTo(aliasSlot))
- definingBlocks[region].insert(user->getBlock());
- }
+ auto collectStoringBlocks = [&](Value ptr, const MemorySlot &ptrSlot) {
+ for (OpOperand &use : ptr.getUses()) {
+ Operation *user = use.getOwner();
+ if (auto storeOp = dyn_cast<PromotableMemOpInterface>(user))
+ if (storeOp.storesTo(ptrSlot))
+ definingBlocks[user->getParentRegion()].insert(user->getBlock());
+ }
+ };
+ collectStoringBlocks(slot.ptr, slot);
+ for (auto &[aliasPtr, aliasInfo] : info.aliasMap)
+ collectStoringBlocks(aliasPtr, aliasInfo.slot);
for (auto &[region, regionInfo] : info.regionsToPromote)
if (regionInfo.hasValueStores)
definingBlocks[region->getParentRegion()].insert(
@@ -590,7 +597,7 @@ Value MemorySlotPromoter::promoteInBlock(Block *block, Value reachingDef) {
// reaching definition (at root elem type) before handing it to
// `getStored`.
reachingDefAtStore = convertSlotValueToAliasValue(
- reachingDef, aliasSlot.ptr, slot, info.aliasMap, builder);
+ reachingDef, aliasSlot, slot, info.aliasMap, builder);
assert(reachingDefAtStore &&
"projectSlotValueToAliasValue contract violation");
}
@@ -603,8 +610,8 @@ Value MemorySlotPromoter::promoteInBlock(Block *block, Value reachingDef) {
// the root slot's elem type, so project `stored` back.
replacedValuesMap[memOp] = stored;
if (aliasSlot.ptr != slot.ptr) {
- stored = convertAliasValueToSlotValue(
- stored, aliasSlot.ptr, reachingDef, slot, info.aliasMap, builder);
+ stored = convertAliasValueToSlotValue(stored, aliasSlot, reachingDef,
+ slot, info.aliasMap, builder);
assert(stored && "projectAliasValueToSlotValue contract violation");
}
reachingDef = stored;
@@ -816,7 +823,7 @@ void MemorySlotPromoter::removeBlockingUses(Region *region) {
// Project the reaching definition to `aliasSlot.elemType` to match
// what `toPromoteMemOp` sees.
reachingDefAtBlockingUse = convertSlotValueToAliasValue(
- reachingDef, aliasSlot.ptr, slot, info.aliasMap, builder);
+ reachingDef, aliasSlot, slot, info.aliasMap, builder);
assert(reachingDefAtBlockingUse &&
"projectSlotValueToAliasValue contract violation");
}
diff --git a/mlir/test/Transforms/mem2reg.mlir b/mlir/test/Transforms/mem2reg.mlir
index 40006a1c61f51..551f913b49313 100644
--- a/mlir/test/Transforms/mem2reg.mlir
+++ b/mlir/test/Transforms/mem2reg.mlir
@@ -294,6 +294,40 @@ func.func @promotable_through_alias_across_regions(%cond: i1, %a: i32) {
// -----
+// Mirror case: the alias is created *inside* `scf.if`, used to store an
+// `f32` value through a type-changing alias, while the parent `i32` slot
+// is read outside. The alias-to-slot projection (`f32` -> `i32`) must run
+// *inside* the region (where the alias is alive) and the resulting `i32`
+// value must be threaded out of `scf.if` via its `setupPromotion`/
+// `finalizePromotion` hooks to feed the parent load.
+
+// CHECK-LABEL: func.func @alias_inside_region_parent_read_outside
+// CHECK-SAME: (%[[COND:.*]]: i1, %[[A:.*]]: f32, %[[INIT:.*]]: i32) -> i32
+// CHECK-NOT: test.multi_slot_alloca
+// CHECK-NOT: test.transparent_cast_alias
+// CHECK-NOT: memref.store
+// CHECK-NOT: memref.load
+// CHECK: %[[RES:.*]] = scf.if %[[COND]] -> (i32)
+// CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %[[A]] : f32 to i32
+// CHECK: scf.yield %[[CAST]] : i32
+// CHECK: } else {
+// CHECK: scf.yield %[[INIT]] : i32
+// CHECK: }
+// CHECK: return %[[RES]] : i32
+func.func @alias_inside_region_parent_read_outside(%cond: i1, %a: f32,
+ %init: i32) -> i32 {
+ %slot = test.multi_slot_alloca : () -> memref<i32>
+ memref.store %init, %slot[] : memref<i32>
+ scf.if %cond {
+ %alias = test.transparent_cast_alias %slot : (memref<i32>) -> memref<f32>
+ memref.store %a, %alias[] : memref<f32>
+ }
+ %v = memref.load %slot[] : memref<i32>
+ return %v : i32
+}
+
+// -----
+
// Chained aliasers: an identity alias is aliased by a type-changing alias.
// The alias-map walk must follow both hops and project through each step.
@@ -340,3 +374,30 @@ func.func @promotable_through_dual_alias(%a: si32) -> ui32 {
%v = memref.load %unsigned[] : memref<ui32>
return %v : ui32
}
+
+// -----
+
+// Partial aliasing: the parent slot stores a `complex<f32>` (a 2-tuple of
+// `f32`), and the alias exposes one component as a `memref<f32>`.
+// The alias-to-slot projection reconstructs the parent value by consuming the
+// current reaching definition (modelled as a 2-input `unrealized_conversion_cast`:
+// new sub-value + parent reaching def). The slot-to-alias projection extracts
+// a component (1-input cast).
+
+// CHECK-LABEL: func.func @promotable_through_partial_alias
+// CHECK-SAME: (%[[X:.*]]: f32) -> f32
+// CHECK-NOT: memref.alloca
+// CHECK-NOT: test.partial_alias
+// CHECK-NOT: memref.store
+// CHECK-NOT: memref.load
+// CHECK: %[[POISON:.*]] = ub.poison : complex<f32>
+// CHECK: %[[NEW:.*]] = builtin.unrealized_conversion_cast %[[X]], %[[POISON]] : f32, complex<f32> to complex<f32>
+// CHECK: %[[R:.*]] = builtin.unrealized_conversion_cast %[[NEW]] : complex<f32> to f32
+// CHECK: return %[[R]] : f32
+func.func @promotable_through_partial_alias(%x: f32) -> f32 {
+ %slot = memref.alloca() : memref<complex<f32>>
+ %alias = test.partial_alias %slot : (memref<complex<f32>>) -> memref<f32>
+ memref.store %x, %alias[] : memref<f32>
+ %v = memref.load %alias[] : memref<f32>
+ return %v : f32
+}
diff --git a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
index 08f96e2cb4cc0..8315bd7cef783 100644
--- a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
+++ b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
@@ -1901,6 +1901,55 @@ Value TestTransparentDualAlias::projectAliasValueToSlotValue(
.getResult(0);
}
+//===----------------------------------------------------------------------===//
+// TestPartialAlias
+//===----------------------------------------------------------------------===//
+
+void TestPartialAlias::getPromotableSlotAliases(
+ OpOperand &aliasedSlotPointerOperand, const MemorySlot & /*parentSlot*/,
+ SmallVectorImpl<MemorySlot> &newMemorySlots) {
+ if (aliasedSlotPointerOperand.get() != getSource())
+ return;
+ newMemorySlots.push_back(MemorySlot{
+ getResult(), cast<MemRefType>(getResult().getType()).getElementType()});
+}
+
+bool TestPartialAlias::canUsesBeRemoved(
+ const SmallPtrSetImpl<OpOperand *> &blockingUses,
+ SmallVectorImpl<OpOperand *> &newBlockingUses,
+ const DataLayout &dataLayout) {
+ for (OpOperand &use : getResult().getUses())
+ newBlockingUses.push_back(&use);
+ return true;
+}
+
+DeletionKind TestPartialAlias::removeBlockingUses(
+ const SmallPtrSetImpl<OpOperand *> &blockingUses, OpBuilder &builder) {
+ return DeletionKind::Delete;
+}
+
+Value TestPartialAlias::projectSlotValueToAliasValue(
+ OpOperand & /*aliasedSlotPointerOperand*/,
+ const MemorySlot & /*parentSlot*/, const MemorySlot &aliasSlot,
+ Value slotValue, OpBuilder &builder) {
+ // Sub-value extraction: 1-input cast.
+ return UnrealizedConversionCastOp::create(builder, getLoc(),
+ aliasSlot.elemType, slotValue)
+ .getResult(0);
+}
+
+Value TestPartialAlias::projectAliasValueToSlotValue(
+ OpOperand & /*aliasedSlotPointerOperand*/, const MemorySlot &parentSlot,
+ const MemorySlot & /*aliasSlot*/, Value aliasValue, Value reachingDef,
+ OpBuilder &builder) {
+ // Sub-value insertion into the current reaching definition: emit a 2-input
+ // cast taking both the new alias value and the existing parent value.
+ return UnrealizedConversionCastOp::create(builder, getLoc(),
+ parentSlot.elemType,
+ ValueRange{aliasValue, reachingDef})
+ .getResult(0);
+}
+
namespace {
/// Returns test dialect's memref layout for test dialect's tensor encoding when
/// applicable.
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index 688b60d48de2f..a1529e3020c82 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -3989,6 +3989,25 @@ def TestTransparentDualAlias : TEST_Op<"transparent_dual_alias",
let assemblyFormat = "$source attr-dict `:` functional-type($source, results)";
}
+// Partial alias of a memref slot: exposes a sub-value of the parent slot.
+// The slot-to-alias projection is a 1-input `unrealized_conversion_cast`
+// (sub-value extraction). The alias-to-slot projection is a 2-input cast
+// taking both the new alias value and the parent slot's reaching definition
+// (sub-value insertion). This exercises mem2reg's `reachingDef` plumbing
+// for partial sub-aliases.
+def TestPartialAlias : TEST_Op<"partial_alias",
+ [DeclareOpInterfaceMethods<PromotableOpInterface,
+ ["canUsesBeRemoved",
+ "removeBlockingUses"]>,
+ DeclareOpInterfaceMethods<PromotableAliaserInterface,
+ ["getPromotableSlotAliases",
+ "projectSlotValueToAliasValue",
+ "projectAliasValueToSlotValue"]>]> {
+ let arguments = (ins AnyMemRef:$source);
+ let results = (outs AnyMemRef:$result);
+ let assemblyFormat = "$source attr-dict `:` functional-type($source, $result)";
+}
+
//===----------------------------------------------------------------------===//
// Test allocation Ops
//===----------------------------------------------------------------------===//
>From 47ff45da62e258084d4d301711c68b17ce6c4fe8 Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Fri, 22 May 2026 07:55:24 -0700
Subject: [PATCH 11/12] add tests
---
mlir/test/Dialect/LLVMIR/mem2reg.mlir | 311 ++++++++++++++++++++++++++
1 file changed, 311 insertions(+)
diff --git a/mlir/test/Dialect/LLVMIR/mem2reg.mlir b/mlir/test/Dialect/LLVMIR/mem2reg.mlir
index 3316b4bb955c3..7862789fa0188 100644
--- a/mlir/test/Dialect/LLVMIR/mem2reg.mlir
+++ b/mlir/test/Dialect/LLVMIR/mem2reg.mlir
@@ -1180,3 +1180,314 @@ llvm.func @dead_direct_use(%arg0 : i1) {
}
llvm.return
}
+
+// -----
+
+// CHECK-LABEL: llvm.func @promote_load_through_bitcast
+// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32
+llvm.func @promote_load_through_bitcast(%arg0: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.bitcast
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr
+ llvm.store %arg0, %1 : i32, !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[ARG0]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @promote_store_through_bitcast
+// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32
+llvm.func @promote_store_through_bitcast(%arg0: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.bitcast
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr
+ llvm.store %arg0, %2 : i32, !llvm.ptr
+ %3 = llvm.load %1 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[ARG0]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @promote_store_and_load_through_bitcast
+// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32
+llvm.func @promote_store_and_load_through_bitcast(%arg0: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.bitcast
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr
+ llvm.store %arg0, %2 : i32, !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[ARG0]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @promote_through_chained_bitcasts
+// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32
+llvm.func @promote_through_chained_bitcasts(%arg0: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.bitcast
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr
+ %3 = llvm.bitcast %2 : !llvm.ptr to !llvm.ptr
+ llvm.store %arg0, %3 : i32, !llvm.ptr
+ %4 = llvm.load %3 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[ARG0]] : i32
+ llvm.return %4 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func amdgpu_kernelcc @promote_through_addrspacecast
+// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32
+llvm.func amdgpu_kernelcc @promote_through_addrspacecast(%arg0: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.addrspacecast
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr<5>
+ %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+ llvm.store %arg0, %2 : i32, !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[ARG0]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @promote_through_zero_gep
+// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32
+llvm.func @promote_through_zero_gep(%arg0: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.getelementptr
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.getelementptr %1[0] : (!llvm.ptr) -> !llvm.ptr, i32
+ llvm.store %arg0, %2 : i32, !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[ARG0]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// Non-zero GEPs are not aliases of the whole slot, so promotion must fail.
+
+// CHECK-LABEL: llvm.func @no_promote_through_nonzero_gep
+llvm.func @no_promote_through_nonzero_gep(%arg0: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK: llvm.alloca
+ %1 = llvm.alloca %0 x !llvm.array<2 x i32> : (i32) -> !llvm.ptr
+ %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<2 x i32>
+ llvm.store %arg0, %2 : i32, !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @promote_through_launder_invariant_group
+// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32
+llvm.func @promote_through_launder_invariant_group(%arg0: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.intr.launder.invariant.group
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.intr.launder.invariant.group %1 : !llvm.ptr
+ llvm.store %arg0, %2 : i32, !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[ARG0]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @promote_through_strip_invariant_group
+// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32
+llvm.func @promote_through_strip_invariant_group(%arg0: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.intr.strip.invariant.group
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.intr.strip.invariant.group %1 : !llvm.ptr
+ llvm.store %arg0, %2 : i32, !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[ARG0]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @promote_through_alias_across_blocks
+// CHECK-SAME: (%[[COND:.*]]: i1, %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32) -> i32
+llvm.func @promote_through_alias_across_blocks(%cond: i1, %arg1: i32, %arg2: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.bitcast
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr
+ llvm.cond_br %cond, ^bb1, ^bb2
+^bb1:
+ llvm.store %arg1, %2 : i32, !llvm.ptr
+ // CHECK: llvm.br ^[[BB3:.*]](%[[ARG1]] : i32)
+ llvm.br ^bb3
+^bb2:
+ llvm.store %arg2, %1 : i32, !llvm.ptr
+ // CHECK: llvm.br ^[[BB3]](%[[ARG2]] : i32)
+ llvm.br ^bb3
+// CHECK: ^[[BB3]](%[[PHI:.*]]: i32):
+^bb3:
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[PHI]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// Load through a bitcast alias, with a load type that differs from the slot's
+// element type: `createExtractAndCast` must emit a same-size bitcast of the
+// reaching definition.
+
+// CHECK-LABEL: @load_int_from_float_through_bitcast
+llvm.func @load_int_from_float_through_bitcast() -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.bitcast %{{.*}} : !llvm.ptr
+ %1 = llvm.alloca %0 x f32 : (i32) -> !llvm.ptr
+ %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : f32
+ // CHECK: %[[CAST:.*]] = llvm.bitcast %[[UNDEF]] : f32 to i32
+ // CHECK: llvm.return %[[CAST]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// Store through a bitcast alias, with a store value type that differs from
+// the slot's element type: `createInsertAndCast` must emit a same-size bitcast
+// of the stored value.
+
+// CHECK-LABEL: @store_float_to_int_through_bitcast
+// CHECK-SAME: %[[ARG:.*]]: f32
+llvm.func @store_float_to_int_through_bitcast(%arg: f32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.bitcast %{{.*}} : !llvm.ptr
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr
+ llvm.store %arg, %2 : f32, !llvm.ptr
+ %3 = llvm.load %1 : !llvm.ptr -> i32
+ // CHECK: %[[CAST:.*]] = llvm.bitcast %[[ARG]] : f32 to i32
+ // CHECK: llvm.return %[[CAST]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// Same as above, but the load also goes through a bitcast alias.
+
+// CHECK-LABEL: @store_int_to_vector_through_bitcasts
+// CHECK-SAME: %[[ARG:.*]]: i32
+llvm.func @store_int_to_vector_through_bitcasts(%arg: i32) -> vector<4xi8> {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.bitcast %{{.*}} : !llvm.ptr
+ %1 = llvm.alloca %0 x vector<2xi16> : (i32) -> !llvm.ptr
+ %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr
+ llvm.store %arg, %2 : i32, !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> vector<4xi8>
+ // CHECK: %[[CAST:.*]] = llvm.bitcast %[[ARG]] : i32 to vector<4xi8>
+ // CHECK: llvm.return %[[CAST]]
+ llvm.return %3 : vector<4xi8>
+}
+
+// -----
+
+// Narrowing load through an addrspacecast alias: `createExtractAndCast` emits
+// a truncating sequence.
+
+// CHECK-LABEL: llvm.func amdgpu_kernelcc @load_smaller_through_addrspacecast
+llvm.func amdgpu_kernelcc @load_smaller_through_addrspacecast() -> f32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.addrspacecast
+ %1 = llvm.alloca %0 x f64 : (i32) -> !llvm.ptr<5>
+ %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> f32
+ // CHECK: %[[CAST:.*]] = llvm.bitcast %{{.*}} : f64 to i64
+ // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[CAST]] : i64 to i32
+ // CHECK: %[[RES:.*]] = llvm.bitcast %[[TRUNC]] : i32 to f32
+ // CHECK: llvm.return %[[RES]] : f32
+ llvm.return %3 : f32
+}
+
+// -----
+
+// Partial store through a zero-index GEP alias: `createInsertAndCast` emits
+// the mask-and-combine sequence into the reaching definition projected back
+// to the slot's element type.
+
+// CHECK-LABEL: @partial_store_through_zero_gep
+// CHECK-SAME: %[[ARG:.+]]: vector<1xi8>
+llvm.func @partial_store_through_zero_gep(%arg: vector<1xi8>) {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.getelementptr
+ // CHECK: %[[UNDEF:.+]] = llvm.mlir.undef : f32
+ %1 = llvm.alloca %0 x f32 : (i32) -> !llvm.ptr
+ %2 = llvm.getelementptr %1[0] : (!llvm.ptr) -> !llvm.ptr, f32
+ // CHECK: %[[CASTED_DEF:.+]] = llvm.bitcast %[[UNDEF]] : f32 to i32
+ // CHECK: %[[CASTED_ARG:.+]] = llvm.bitcast %[[ARG]] : vector<1xi8> to i8
+ // CHECK: %[[ZEXT:.+]] = llvm.zext %[[CASTED_ARG]] : i8 to i32
+ // CHECK: %[[MASK:.+]] = llvm.mlir.constant(-256 : i32) : i32
+ // CHECK: %[[MASKED:.+]] = llvm.and %[[CASTED_DEF]], %[[MASK]]
+ // CHECK: %[[NEW_DEF:.+]] = llvm.or %[[MASKED]], %[[ZEXT]]
+ // CHECK: %[[CASTED_NEW_DEF:.+]] = llvm.bitcast %[[NEW_DEF]] : i32 to f32
+ llvm.store %arg, %2 : vector<1xi8>, !llvm.ptr
+ llvm.return
+}
+
+// -----
+
+// Cross-block partial store through a launder.invariant.group alias: the
+// mask-and-combine value flows through the block-argument added at the join
+// point, and the load on the other side reads it back through `createExtract
+// AndCast`.
+
+// CHECK-LABEL: @cross_block_partial_store_through_alias
+// CHECK-SAME: (%[[COND:.*]]: i1, %[[ARG:.+]]: i16) -> i32
+llvm.func @cross_block_partial_store_through_alias(%cond: i1, %arg: i16) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.intr.launder.invariant.group
+ // CHECK: %[[UNDEF:.+]] = llvm.mlir.undef : i32
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.intr.launder.invariant.group %1 : !llvm.ptr
+ llvm.cond_br %cond, ^bb1, ^bb2
+^bb1:
+ // CHECK: %[[ZEXT:.+]] = llvm.zext %[[ARG]] : i16 to i32
+ // CHECK: %[[MASK:.+]] = llvm.mlir.constant(-65536 : i32) : i32
+ // CHECK: %[[MASKED:.+]] = llvm.and %[[UNDEF]], %[[MASK]]
+ // CHECK: %[[NEW_DEF:.+]] = llvm.or %[[MASKED]], %[[ZEXT]]
+ // CHECK: llvm.br ^[[BB3:.*]](%[[NEW_DEF]] : i32)
+ llvm.store %arg, %2 : i16, !llvm.ptr
+ llvm.br ^bb3
+^bb2:
+ // CHECK: llvm.br ^[[BB3]](%[[UNDEF]] : i32)
+ llvm.br ^bb3
+// CHECK: ^[[BB3]](%[[PHI:.*]]: i32):
+^bb3:
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[PHI]] : i32
+ llvm.return %3 : i32
+}
>From e55af2c684b10d863769320c42385efb4d9d61e3 Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Tue, 26 May 2026 06:22:34 -0700
Subject: [PATCH 12/12] remove description of what not do to to focus on what
must be done
---
mlir/lib/Transforms/Mem2Reg.cpp | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/mlir/lib/Transforms/Mem2Reg.cpp b/mlir/lib/Transforms/Mem2Reg.cpp
index 22664b35deea9..277457f574f55 100644
--- a/mlir/lib/Transforms/Mem2Reg.cpp
+++ b/mlir/lib/Transforms/Mem2Reg.cpp
@@ -537,9 +537,7 @@ MemorySlotPromotionAnalyzer::computeInfo() {
// Iterate over direct users of the slot pointer and all alias pointers in
// `info.aliasMap`. This assumes `PromotableMemOpInterface` operations storing
// to the slot use the slot pointer or its aliases directly. Dialects must
- // implement `PromotableAliaserInterface` for views/aliasing, rather than
- // manually walking operand definitions in `PromotableMemOpInterface` to find
- // indirect slot usages.
+ // implement `PromotableAliaserInterface` for views/aliasing.
DenseMap<Region *, SmallPtrSet<Block *, 16>> definingBlocks;
auto collectStoringBlocks = [&](Value ptr, const MemorySlot &ptrSlot) {
for (OpOperand &use : ptr.getUses()) {
More information about the Mlir-commits
mailing list