[llvm-branch-commits] [mlir] [mlir][LLVMIR] implement PromotableAliaserInterface (PR #199226)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri May 22 08:13:07 PDT 2026
llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-llvm
Author: jeanPerier
<details>
<summary>Changes</summary>
Implement the new PromotableAliaserInterface for LLVM IR dialect.
Note that since LLVM IR pointer are opaque, the type conversion logic is still done by the load/store as before, but this patch allows load/store after bitcast, launder.invariant.group, strip.invariant.group, zero offset getelementptr, and addrspacecast to be candidates for mem2reg promotion.
Without this patch, mem2reg does not succeeds on any of the newly added tests.
---
Full diff: https://github.com/llvm/llvm-project/pull/199226.diff
4 Files Affected:
- (modified) mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td (+4)
- (modified) mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td (+8-1)
- (modified) mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp (+44)
- (modified) mlir/test/Dialect/LLVMIR/mem2reg.mlir (+311)
``````````diff
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
index 688bc19cbf18a..52aa16e3b7a14 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
@@ -403,6 +403,8 @@ def LLVM_InvariantEndOp : LLVM_ZeroResultIntrOp<"invariant.end", [2],
def LLVM_LaunderInvariantGroupOp
: LLVM_OneResultIntrOp<"launder.invariant.group", [], [0],
[DeclareOpInterfaceMethods<PromotableOpInterface>,
+ DeclareOpInterfaceMethods<PromotableAliaserInterface,
+ ["getPromotableSlotAliases"]>,
SameOperandsAndResultType]> {
let arguments = (ins LLVM_AnyPointer:$ptr);
let results = (outs LLVM_AnyPointer:$res);
@@ -412,6 +414,8 @@ def LLVM_LaunderInvariantGroupOp
def LLVM_StripInvariantGroupOp
: LLVM_OneResultIntrOp<"strip.invariant.group", [], [0],
[DeclareOpInterfaceMethods<PromotableOpInterface>,
+ DeclareOpInterfaceMethods<PromotableAliaserInterface,
+ ["getPromotableSlotAliases"]>,
SameOperandsAndResultType]> {
let arguments = (ins LLVM_AnyPointer:$ptr);
let results = (outs LLVM_AnyPointer:$res);
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index d7c8cf236f0da..b51c689fbdbcd 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -286,6 +286,8 @@ def LLVM_AllocaOp : LLVM_Op<"alloca",
def LLVM_GEPOp : LLVM_Op<"getelementptr", [Pure,
DeclareOpInterfaceMethods<PromotableOpInterface>,
+ DeclareOpInterfaceMethods<PromotableAliaserInterface,
+ ["getPromotableSlotAliases"]>,
DeclareOpInterfaceMethods<SafeMemorySlotAccessOpInterface>,
DeclareOpInterfaceMethods<DestructurableAccessorOpInterface>,
DeclareOpInterfaceMethods<ViewLikeOpInterface>
@@ -632,7 +634,10 @@ class LLVM_DereferenceableCastOp<string mnemonic, string instName, Type type,
}
def LLVM_BitcastOp : LLVM_CastOp<"bitcast", "BitCast", LLVM_AnyNonAggregate,
- LLVM_AnyNonAggregate, [DeclareOpInterfaceMethods<PromotableOpInterface>]> {
+ LLVM_AnyNonAggregate,
+ [DeclareOpInterfaceMethods<PromotableOpInterface>,
+ DeclareOpInterfaceMethods<PromotableAliaserInterface,
+ ["getPromotableSlotAliases"]>]> {
let hasFolder = 1;
let hasVerifier = 1;
}
@@ -640,6 +645,8 @@ def LLVM_AddrSpaceCastOp : LLVM_CastOp<"addrspacecast", "AddrSpaceCast",
LLVM_ScalarOrVectorOf<LLVM_AnyPointer>,
LLVM_ScalarOrVectorOf<LLVM_AnyPointer>,
[DeclareOpInterfaceMethods<PromotableOpInterface>,
+ DeclareOpInterfaceMethods<PromotableAliaserInterface,
+ ["getPromotableSlotAliases"]>,
DeclareOpInterfaceMethods<ViewLikeOpInterface>]> {
let hasFolder = 1;
}
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
index 66d6a592a93df..c79c1199df770 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp
@@ -536,6 +536,18 @@ DeletionKind LLVM::BitcastOp::removeBlockingUses(
return DeletionKind::Delete;
}
+void LLVM::BitcastOp::getPromotableSlotAliases(
+ OpOperand &aliasedSlotPointerOperand, const MemorySlot &parentSlot,
+ SmallVectorImpl<MemorySlot> &newMemorySlots) {
+ // Only pointer-to-pointer bitcasts alias a memory slot. With opaque
+ // pointers, the alias slot keeps the parent's element type since the
+ // pointer type itself does not carry it.
+ if (!isa<LLVM::LLVMPointerType>(getResult().getType()) ||
+ !isa<LLVM::LLVMPointerType>(getArg().getType()))
+ return;
+ newMemorySlots.push_back(MemorySlot{getResult(), parentSlot.elemType});
+}
+
bool LLVM::AddrSpaceCastOp::canUsesBeRemoved(
const SmallPtrSetImpl<OpOperand *> &blockingUses,
SmallVectorImpl<OpOperand *> &newBlockingUses,
@@ -548,6 +560,15 @@ DeletionKind LLVM::AddrSpaceCastOp::removeBlockingUses(
return DeletionKind::Delete;
}
+void LLVM::AddrSpaceCastOp::getPromotableSlotAliases(
+ OpOperand &aliasedSlotPointerOperand, const MemorySlot &parentSlot,
+ SmallVectorImpl<MemorySlot> &newMemorySlots) {
+ // Only the scalar pointer form aliases a memory slot.
+ if (!isa<LLVM::LLVMPointerType>(getResult().getType()))
+ return;
+ newMemorySlots.push_back(MemorySlot{getResult(), parentSlot.elemType});
+}
+
bool LLVM::LifetimeStartOp::canUsesBeRemoved(
const SmallPtrSetImpl<OpOperand *> &blockingUses,
SmallVectorImpl<OpOperand *> &newBlockingUses,
@@ -608,6 +629,12 @@ DeletionKind LLVM::LaunderInvariantGroupOp::removeBlockingUses(
return DeletionKind::Delete;
}
+void LLVM::LaunderInvariantGroupOp::getPromotableSlotAliases(
+ OpOperand &aliasedSlotPointerOperand, const MemorySlot &parentSlot,
+ SmallVectorImpl<MemorySlot> &newMemorySlots) {
+ newMemorySlots.push_back(MemorySlot{getResult(), parentSlot.elemType});
+}
+
bool LLVM::StripInvariantGroupOp::canUsesBeRemoved(
const SmallPtrSetImpl<OpOperand *> &blockingUses,
SmallVectorImpl<OpOperand *> &newBlockingUses,
@@ -620,6 +647,12 @@ DeletionKind LLVM::StripInvariantGroupOp::removeBlockingUses(
return DeletionKind::Delete;
}
+void LLVM::StripInvariantGroupOp::getPromotableSlotAliases(
+ OpOperand &aliasedSlotPointerOperand, const MemorySlot &parentSlot,
+ SmallVectorImpl<MemorySlot> &newMemorySlots) {
+ newMemorySlots.push_back(MemorySlot{getResult(), parentSlot.elemType});
+}
+
bool LLVM::DbgDeclareOp::canUsesBeRemoved(
const SmallPtrSetImpl<OpOperand *> &blockingUses,
SmallVectorImpl<OpOperand *> &newBlockingUses,
@@ -694,6 +727,17 @@ DeletionKind LLVM::GEPOp::removeBlockingUses(
return DeletionKind::Delete;
}
+void LLVM::GEPOp::getPromotableSlotAliases(
+ OpOperand &aliasedSlotPointerOperand, const MemorySlot &parentSlot,
+ SmallVectorImpl<MemorySlot> &newMemorySlots) {
+ // Only zero-index GEPs are no-op aliases of the slot pointer; non-zero
+ // indices step into the slot and cannot be projected back generically.
+ if (!hasAllZeroIndices(*this) ||
+ !isa<LLVM::LLVMPointerType>(getResult().getType()))
+ return;
+ newMemorySlots.push_back(MemorySlot{getResult(), parentSlot.elemType});
+}
+
/// Returns the amount of bytes the provided GEP elements will offset the
/// pointer by. Returns nullopt if no constant offset could be computed.
static std::optional<uint64_t> gepToByteOffset(const DataLayout &dataLayout,
diff --git a/mlir/test/Dialect/LLVMIR/mem2reg.mlir b/mlir/test/Dialect/LLVMIR/mem2reg.mlir
index 3316b4bb955c3..7862789fa0188 100644
--- a/mlir/test/Dialect/LLVMIR/mem2reg.mlir
+++ b/mlir/test/Dialect/LLVMIR/mem2reg.mlir
@@ -1180,3 +1180,314 @@ llvm.func @dead_direct_use(%arg0 : i1) {
}
llvm.return
}
+
+// -----
+
+// CHECK-LABEL: llvm.func @promote_load_through_bitcast
+// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32
+llvm.func @promote_load_through_bitcast(%arg0: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.bitcast
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr
+ llvm.store %arg0, %1 : i32, !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[ARG0]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @promote_store_through_bitcast
+// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32
+llvm.func @promote_store_through_bitcast(%arg0: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.bitcast
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr
+ llvm.store %arg0, %2 : i32, !llvm.ptr
+ %3 = llvm.load %1 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[ARG0]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @promote_store_and_load_through_bitcast
+// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32
+llvm.func @promote_store_and_load_through_bitcast(%arg0: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.bitcast
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr
+ llvm.store %arg0, %2 : i32, !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[ARG0]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @promote_through_chained_bitcasts
+// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32
+llvm.func @promote_through_chained_bitcasts(%arg0: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.bitcast
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr
+ %3 = llvm.bitcast %2 : !llvm.ptr to !llvm.ptr
+ llvm.store %arg0, %3 : i32, !llvm.ptr
+ %4 = llvm.load %3 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[ARG0]] : i32
+ llvm.return %4 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func amdgpu_kernelcc @promote_through_addrspacecast
+// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32
+llvm.func amdgpu_kernelcc @promote_through_addrspacecast(%arg0: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.addrspacecast
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr<5>
+ %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+ llvm.store %arg0, %2 : i32, !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[ARG0]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @promote_through_zero_gep
+// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32
+llvm.func @promote_through_zero_gep(%arg0: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.getelementptr
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.getelementptr %1[0] : (!llvm.ptr) -> !llvm.ptr, i32
+ llvm.store %arg0, %2 : i32, !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[ARG0]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// Non-zero GEPs are not aliases of the whole slot, so promotion must fail.
+
+// CHECK-LABEL: llvm.func @no_promote_through_nonzero_gep
+llvm.func @no_promote_through_nonzero_gep(%arg0: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK: llvm.alloca
+ %1 = llvm.alloca %0 x !llvm.array<2 x i32> : (i32) -> !llvm.ptr
+ %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<2 x i32>
+ llvm.store %arg0, %2 : i32, !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @promote_through_launder_invariant_group
+// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32
+llvm.func @promote_through_launder_invariant_group(%arg0: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.intr.launder.invariant.group
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.intr.launder.invariant.group %1 : !llvm.ptr
+ llvm.store %arg0, %2 : i32, !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[ARG0]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @promote_through_strip_invariant_group
+// CHECK-SAME: (%[[ARG0:.*]]: i32) -> i32
+llvm.func @promote_through_strip_invariant_group(%arg0: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.intr.strip.invariant.group
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.intr.strip.invariant.group %1 : !llvm.ptr
+ llvm.store %arg0, %2 : i32, !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[ARG0]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// CHECK-LABEL: llvm.func @promote_through_alias_across_blocks
+// CHECK-SAME: (%[[COND:.*]]: i1, %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32) -> i32
+llvm.func @promote_through_alias_across_blocks(%cond: i1, %arg1: i32, %arg2: i32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.bitcast
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr
+ llvm.cond_br %cond, ^bb1, ^bb2
+^bb1:
+ llvm.store %arg1, %2 : i32, !llvm.ptr
+ // CHECK: llvm.br ^[[BB3:.*]](%[[ARG1]] : i32)
+ llvm.br ^bb3
+^bb2:
+ llvm.store %arg2, %1 : i32, !llvm.ptr
+ // CHECK: llvm.br ^[[BB3]](%[[ARG2]] : i32)
+ llvm.br ^bb3
+// CHECK: ^[[BB3]](%[[PHI:.*]]: i32):
+^bb3:
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[PHI]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// Load through a bitcast alias, with a load type that differs from the slot's
+// element type: `createExtractAndCast` must emit a same-size bitcast of the
+// reaching definition.
+
+// CHECK-LABEL: @load_int_from_float_through_bitcast
+llvm.func @load_int_from_float_through_bitcast() -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.bitcast %{{.*}} : !llvm.ptr
+ %1 = llvm.alloca %0 x f32 : (i32) -> !llvm.ptr
+ %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : f32
+ // CHECK: %[[CAST:.*]] = llvm.bitcast %[[UNDEF]] : f32 to i32
+ // CHECK: llvm.return %[[CAST]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// Store through a bitcast alias, with a store value type that differs from
+// the slot's element type: `createInsertAndCast` must emit a same-size bitcast
+// of the stored value.
+
+// CHECK-LABEL: @store_float_to_int_through_bitcast
+// CHECK-SAME: %[[ARG:.*]]: f32
+llvm.func @store_float_to_int_through_bitcast(%arg: f32) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.bitcast %{{.*}} : !llvm.ptr
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr
+ llvm.store %arg, %2 : f32, !llvm.ptr
+ %3 = llvm.load %1 : !llvm.ptr -> i32
+ // CHECK: %[[CAST:.*]] = llvm.bitcast %[[ARG]] : f32 to i32
+ // CHECK: llvm.return %[[CAST]] : i32
+ llvm.return %3 : i32
+}
+
+// -----
+
+// Same as above, but the load also goes through a bitcast alias.
+
+// CHECK-LABEL: @store_int_to_vector_through_bitcasts
+// CHECK-SAME: %[[ARG:.*]]: i32
+llvm.func @store_int_to_vector_through_bitcasts(%arg: i32) -> vector<4xi8> {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.bitcast %{{.*}} : !llvm.ptr
+ %1 = llvm.alloca %0 x vector<2xi16> : (i32) -> !llvm.ptr
+ %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr
+ llvm.store %arg, %2 : i32, !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> vector<4xi8>
+ // CHECK: %[[CAST:.*]] = llvm.bitcast %[[ARG]] : i32 to vector<4xi8>
+ // CHECK: llvm.return %[[CAST]]
+ llvm.return %3 : vector<4xi8>
+}
+
+// -----
+
+// Narrowing load through an addrspacecast alias: `createExtractAndCast` emits
+// a truncating sequence.
+
+// CHECK-LABEL: llvm.func amdgpu_kernelcc @load_smaller_through_addrspacecast
+llvm.func amdgpu_kernelcc @load_smaller_through_addrspacecast() -> f32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.addrspacecast
+ %1 = llvm.alloca %0 x f64 : (i32) -> !llvm.ptr<5>
+ %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+ %3 = llvm.load %2 : !llvm.ptr -> f32
+ // CHECK: %[[CAST:.*]] = llvm.bitcast %{{.*}} : f64 to i64
+ // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[CAST]] : i64 to i32
+ // CHECK: %[[RES:.*]] = llvm.bitcast %[[TRUNC]] : i32 to f32
+ // CHECK: llvm.return %[[RES]] : f32
+ llvm.return %3 : f32
+}
+
+// -----
+
+// Partial store through a zero-index GEP alias: `createInsertAndCast` emits
+// the mask-and-combine sequence into the reaching definition projected back
+// to the slot's element type.
+
+// CHECK-LABEL: @partial_store_through_zero_gep
+// CHECK-SAME: %[[ARG:.+]]: vector<1xi8>
+llvm.func @partial_store_through_zero_gep(%arg: vector<1xi8>) {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.getelementptr
+ // CHECK: %[[UNDEF:.+]] = llvm.mlir.undef : f32
+ %1 = llvm.alloca %0 x f32 : (i32) -> !llvm.ptr
+ %2 = llvm.getelementptr %1[0] : (!llvm.ptr) -> !llvm.ptr, f32
+ // CHECK: %[[CASTED_DEF:.+]] = llvm.bitcast %[[UNDEF]] : f32 to i32
+ // CHECK: %[[CASTED_ARG:.+]] = llvm.bitcast %[[ARG]] : vector<1xi8> to i8
+ // CHECK: %[[ZEXT:.+]] = llvm.zext %[[CASTED_ARG]] : i8 to i32
+ // CHECK: %[[MASK:.+]] = llvm.mlir.constant(-256 : i32) : i32
+ // CHECK: %[[MASKED:.+]] = llvm.and %[[CASTED_DEF]], %[[MASK]]
+ // CHECK: %[[NEW_DEF:.+]] = llvm.or %[[MASKED]], %[[ZEXT]]
+ // CHECK: %[[CASTED_NEW_DEF:.+]] = llvm.bitcast %[[NEW_DEF]] : i32 to f32
+ llvm.store %arg, %2 : vector<1xi8>, !llvm.ptr
+ llvm.return
+}
+
+// -----
+
+// Cross-block partial store through a launder.invariant.group alias: the
+// mask-and-combine value flows through the block-argument added at the join
+// point, and the load on the other side reads it back through `createExtract
+// AndCast`.
+
+// CHECK-LABEL: @cross_block_partial_store_through_alias
+// CHECK-SAME: (%[[COND:.*]]: i1, %[[ARG:.+]]: i16) -> i32
+llvm.func @cross_block_partial_store_through_alias(%cond: i1, %arg: i16) -> i32 {
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ // CHECK-NOT: llvm.alloca
+ // CHECK-NOT: llvm.intr.launder.invariant.group
+ // CHECK: %[[UNDEF:.+]] = llvm.mlir.undef : i32
+ %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
+ %2 = llvm.intr.launder.invariant.group %1 : !llvm.ptr
+ llvm.cond_br %cond, ^bb1, ^bb2
+^bb1:
+ // CHECK: %[[ZEXT:.+]] = llvm.zext %[[ARG]] : i16 to i32
+ // CHECK: %[[MASK:.+]] = llvm.mlir.constant(-65536 : i32) : i32
+ // CHECK: %[[MASKED:.+]] = llvm.and %[[UNDEF]], %[[MASK]]
+ // CHECK: %[[NEW_DEF:.+]] = llvm.or %[[MASKED]], %[[ZEXT]]
+ // CHECK: llvm.br ^[[BB3:.*]](%[[NEW_DEF]] : i32)
+ llvm.store %arg, %2 : i16, !llvm.ptr
+ llvm.br ^bb3
+^bb2:
+ // CHECK: llvm.br ^[[BB3]](%[[UNDEF]] : i32)
+ llvm.br ^bb3
+// CHECK: ^[[BB3]](%[[PHI:.*]]: i32):
+^bb3:
+ %3 = llvm.load %2 : !llvm.ptr -> i32
+ // CHECK: llvm.return %[[PHI]] : i32
+ llvm.return %3 : i32
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/199226
More information about the llvm-branch-commits
mailing list