[llvm] [SROA] Rewrite invariant group intrinsics after splitting alloca (PR #107557)
Antonio Frighetto via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 29 08:55:20 PDT 2024
https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/107557
>From 05284a071a6620cd3c5846b5ff36918da0c2e1a6 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 6 Sep 2024 11:03:59 +0200
Subject: [PATCH 1/3] [SROA] Rewrite invariant group intrinsics after splitting
alloca
A miscompilation issue has been addressed with improved handling.
Fixes: https://github.com/llvm/llvm-project/issues/105537.
---
llvm/lib/Transforms/Scalar/SROA.cpp | 13 ++++++++++++-
.../Transforms/Utils/PromoteMemoryToRegister.cpp | 3 ++-
llvm/test/Transforms/SROA/invariant-group.ll | 14 ++++++++++++++
3 files changed, 28 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 92589ab17da313..a962c2dfe9ac5d 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -3531,8 +3531,19 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
return true;
}
- if (II.isLaunderOrStripInvariantGroup())
+ if (II.isLaunderOrStripInvariantGroup()) {
+ Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
+ Value *New = nullptr;
+ if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
+ New = IRB.CreateLaunderInvariantGroup(AdjustedPtr);
+ else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
+ New = IRB.CreateStripInvariantGroup(AdjustedPtr);
+
+ New->takeName(&II);
+ II.replaceAllUsesWith(New);
+ LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return true;
+ }
assert(II.getArgOperand(1) == OldPtr);
// Lifetime intrinsics are only promotable if they cover the whole alloca.
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 1b7912fdf5e304..95a4178b276177 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -81,7 +81,8 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
return false;
} else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
if (!II->isLifetimeStartOrEnd() && !II->isDroppable() &&
- II->getIntrinsicID() != Intrinsic::fake_use)
+ II->getIntrinsicID() != Intrinsic::fake_use &&
+ !II->isLaunderOrStripInvariantGroup())
return false;
} else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI))
diff --git a/llvm/test/Transforms/SROA/invariant-group.ll b/llvm/test/Transforms/SROA/invariant-group.ll
index 1be6f6e2fc32bb..756912e56f0c8d 100644
--- a/llvm/test/Transforms/SROA/invariant-group.ll
+++ b/llvm/test/Transforms/SROA/invariant-group.ll
@@ -142,6 +142,7 @@ define void @partial_promotion_of_alloca() {
; CHECK-LABEL: @partial_promotion_of_alloca(
; CHECK-NEXT: [[STRUCT_PTR_SROA_2:%.*]] = alloca i32, align 4
; CHECK-NEXT: store volatile i32 0, ptr [[STRUCT_PTR_SROA_2]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[STRUCT_PTR_SROA_2]])
; CHECK-NEXT: [[STRUCT_PTR_SROA_2_0_STRUCT_PTR_SROA_2_4_LOAD_VAL:%.*]] = load volatile i32, ptr [[STRUCT_PTR_SROA_2]], align 4
; CHECK-NEXT: ret void
;
@@ -155,6 +156,19 @@ define void @partial_promotion_of_alloca() {
ret void
}
+define void @memcpy_after_laundering_alloca(ptr %ptr) {
+; CHECK-LABEL: @memcpy_after_laundering_alloca(
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca { i64, i64 }, align 8
+; CHECK-NEXT: [[LAUNDER:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[ALLOCA]])
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[LAUNDER]], ptr [[PTR:%.*]], i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ %alloca = alloca { i64, i64 }, align 8
+ %launder = call ptr @llvm.launder.invariant.group.p0(ptr %alloca)
+ call void @llvm.memcpy.p0.p0.i64(ptr %launder, ptr %ptr, i64 16, i1 false)
+ ret void
+}
+
declare void @use(ptr)
!0 = !{}
>From 454da7cc7a00ccd2b3f6230a9ca63456662c790b Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Tue, 1 Oct 2024 21:11:10 +0200
Subject: [PATCH 2/3] !fixup update comment
---
llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 95a4178b276177..79c7b018357641 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -492,9 +492,9 @@ static void removeIntrinsicUsers(AllocaInst *AI) {
}
if (!I->getType()->isVoidTy()) {
- // The only users of this bitcast/GEP instruction are lifetime intrinsics.
- // Follow the use/def chain to erase them now instead of leaving it for
- // dead code elimination later.
+ // The only users of this bitcast/GEP instruction are lifetime intrinsics,
+ // fake_use as well as invariant group ones. Follow the use/def chain to
+ // erase them now instead of leaving it for dead code elimination later.
for (Use &UU : llvm::make_early_inc_range(I->uses())) {
Instruction *Inst = cast<Instruction>(UU.getUser());
>From 2a70bbfcd4f2a62ebc55e3998481c50580b84feb Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Tue, 29 Oct 2024 16:54:21 +0100
Subject: [PATCH 3/3] !fixup maybe rewrite memintrinsics when rewriting
invariant group
---
llvm/lib/Transforms/Scalar/SROA.cpp | 83 +++++++++++++++-----
llvm/test/Transforms/SROA/invariant-group.ll | 21 +++++
2 files changed, 83 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index a962c2dfe9ac5d..6f218a95499775 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2656,6 +2656,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
SmallSetVector<PHINode *, 8> &PHIUsers;
SmallSetVector<SelectInst *, 8> &SelectUsers;
+ // Track invariant intrinsic for rewriting its memory intrinsics users.
+ std::optional<Value *> InvariantIntr;
+
// Utility IR builder, whose name prefix is setup for each visited use, and
// the insertion point is set to point to the user.
IRBuilderTy IRB;
@@ -2789,6 +2792,28 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
);
}
+ // Return getNewAllocaSlicePtr, unless the new alloca ptr has passed through
+ // invariant group intrinsic, in which case ensure to return such a pointer.
+ Value *getInvariantGroupPtrOrNewAllocaSlicePtr(IRBuilderTy &IRB,
+ Instruction *I) {
+ // Get the newly-rewritten invariant or the current one, if it has been
+ // rewritten in previous iterations or alloca was not split further.
+ if (isa<IntrinsicInst>(I) &&
+ cast<IntrinsicInst>(I)->isLaunderOrStripInvariantGroup())
+ return InvariantIntr ? *InvariantIntr : I;
+ return getNewAllocaSlicePtr(IRB, I->getType());
+ }
+
+ // Return getPtrToNewAI, unless the new alloca ptr has passed through
+ // invariant group intrinsic, in which case ensure to return such a pointer.
+ Value *getInvariantGroupPtrOrPtrToNewAI(Instruction *I, unsigned AddrSpace,
+ bool IsVolatile) {
+ if (isa<IntrinsicInst>(I) &&
+ cast<IntrinsicInst>(I)->isLaunderOrStripInvariantGroup())
+ return InvariantIntr ? *InvariantIntr : I;
+ return getPtrToNewAI(AddrSpace, IsVolatile);
+ }
+
/// Compute suitable alignment to access this slice of the *new*
/// alloca.
///
@@ -3141,12 +3166,13 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
AAMDNodes AATags = II.getAAMetadata();
+ Value *NewDestPtr = getInvariantGroupPtrOrNewAllocaSlicePtr(IRB, OldPtr);
// If the memset has a variable size, it cannot be split, just adjust the
// pointer to the new alloca.
if (!isa<ConstantInt>(II.getLength())) {
assert(!IsSplit);
assert(NewBeginOffset == BeginOffset);
- II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
+ II.setDest(NewDestPtr);
II.setDestAlignment(getSliceAlign());
// In theory we should call migrateDebugInfo here. However, we do not
// emit dbg.assign intrinsics for mem intrinsics storing through non-
@@ -3186,9 +3212,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
Type *SizeTy = II.getLength()->getType();
unsigned Sz = NewEndOffset - NewBeginOffset;
Constant *Size = ConstantInt::get(SizeTy, Sz);
- MemIntrinsic *New = cast<MemIntrinsic>(IRB.CreateMemSet(
- getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
- MaybeAlign(getSliceAlign()), II.isVolatile()));
+ MemIntrinsic *New = cast<MemIntrinsic>(
+ IRB.CreateMemSet(NewDestPtr, II.getValue(), Size,
+ MaybeAlign(getSliceAlign()), II.isVolatile()));
if (AATags)
New->setAAMetadata(
AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz));
@@ -3261,7 +3287,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
V = convertValue(DL, IRB, V, AllocaTy);
}
- Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
+ Value *NewPtr = getInvariantGroupPtrOrPtrToNewAI(
+ OldPtr, II.getDestAddressSpace(), II.isVolatile());
StoreInst *New =
IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());
New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
@@ -3372,13 +3399,13 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
OtherAlign =
commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
- if (EmitMemCpy) {
- // Compute the other pointer, folding as much as possible to produce
- // a single, simple GEP in most cases.
- OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
- OtherPtr->getName() + ".");
+ // Compute the other pointer, folding as much as possible to produce
+ // a single, simple GEP in most cases.
+ OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
+ OtherPtr->getName() + ".");
- Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
+ if (EmitMemCpy) {
+ Value *OutPtr = getInvariantGroupPtrOrNewAllocaSlicePtr(IRB, OldPtr);
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
@@ -3386,14 +3413,14 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
MaybeAlign DestAlign, SrcAlign;
// Note: IsDest is true iff we're copying into the new alloca slice
if (IsDest) {
- DestPtr = OurPtr;
+ DestPtr = OutPtr;
DestAlign = SliceAlign;
SrcPtr = OtherPtr;
SrcAlign = OtherAlign;
} else {
DestPtr = OtherPtr;
DestAlign = OtherAlign;
- SrcPtr = OurPtr;
+ SrcPtr = OutPtr;
SrcAlign = SliceAlign;
}
CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
@@ -3438,8 +3465,6 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
OtherTy = NewAllocaTy;
}
- Value *AdjPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
- OtherPtr->getName() + ".");
MaybeAlign SrcAlign = OtherAlign;
MaybeAlign DstAlign = SliceAlign;
if (!IsDest)
@@ -3449,11 +3474,13 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
Value *DstPtr;
if (IsDest) {
- DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
- SrcPtr = AdjPtr;
+ DstPtr = getInvariantGroupPtrOrPtrToNewAI(
+ OldPtr, II.getDestAddressSpace(), II.isVolatile());
+ SrcPtr = OtherPtr;
} else {
- DstPtr = AdjPtr;
- SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile());
+ DstPtr = OtherPtr;
+ SrcPtr = getInvariantGroupPtrOrPtrToNewAI(
+ OldPtr, II.getSourceAddressSpace(), II.isVolatile());
}
Value *Src;
@@ -3539,8 +3566,22 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
New = IRB.CreateStripInvariantGroup(AdjustedPtr);
- New->takeName(&II);
- II.replaceAllUsesWith(New);
+ if (&OldAI == &NewAI) {
+ New->takeName(&II);
+ II.replaceAllUsesWith(New);
+ } else {
+ // If the alloca can be split further, memory intrinsics using the
+ // invariant group may also need to be rewritten. Record the invariant
+ // for when the memory intrinsic is later visited.
+ for (Use &U : II.uses())
+ if (isa<MemIntrinsic>(U.getUser())) {
+ if (!InvariantIntr)
+ InvariantIntr = New;
+ continue;
+ } else {
+ U.set(New);
+ }
+ }
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return true;
}
diff --git a/llvm/test/Transforms/SROA/invariant-group.ll b/llvm/test/Transforms/SROA/invariant-group.ll
index 756912e56f0c8d..2b4e588ba0358e 100644
--- a/llvm/test/Transforms/SROA/invariant-group.ll
+++ b/llvm/test/Transforms/SROA/invariant-group.ll
@@ -169,6 +169,27 @@ define void @memcpy_after_laundering_alloca(ptr %ptr) {
ret void
}
+define void @memcpy_after_laundering_alloca_slices(ptr %ptr) {
+; CHECK-LABEL: @memcpy_after_laundering_alloca_slices(
+; CHECK-NEXT: [[ALLOCA_SROA_0:%.*]] = alloca [16 x i8], align 8
+; CHECK-NEXT: [[ALLOCA_SROA_3:%.*]] = alloca [16 x i8], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[ALLOCA_SROA_0]])
+; CHECK-NEXT: [[TMP2:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[ALLOCA_SROA_3]])
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP1]], ptr align 1 [[PTR:%.*]], i64 16, i1 false)
+; CHECK-NEXT: [[ALLOCA_SROA_2_0_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 16
+; CHECK-NEXT: [[ALLOCA_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[ALLOCA_SROA_2_0_PTR_SROA_IDX]], align 1
+; CHECK-NEXT: [[ALLOCA_SROA_3_0_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 24
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 1 [[ALLOCA_SROA_3_0_PTR_SROA_IDX]], i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ %alloca = alloca { [16 x i8], i64, [16 x i8] }, align 8
+ %launder = call ptr @llvm.launder.invariant.group.p0(ptr %alloca)
+ %gep = getelementptr i8, ptr %launder, i64 16
+ store i64 0, ptr %gep
+ call void @llvm.memcpy.p0.p0.i64(ptr %launder, ptr %ptr, i64 40, i1 false)
+ ret void
+}
+
declare void @use(ptr)
!0 = !{}
More information about the llvm-commits
mailing list