[llvm] [SROA] Rewrite invariant group intrinsics after splitting alloca (PR #107557)

Antonio Frighetto via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 29 08:55:20 PDT 2024


https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/107557

>From 05284a071a6620cd3c5846b5ff36918da0c2e1a6 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 6 Sep 2024 11:03:59 +0200
Subject: [PATCH 1/3] [SROA] Rewrite invariant group intrinsics after splitting
 alloca

A miscompilation issue has been addressed with improved handling.

Fixes: https://github.com/llvm/llvm-project/issues/105537.
---
 llvm/lib/Transforms/Scalar/SROA.cpp                | 13 ++++++++++++-
 .../Transforms/Utils/PromoteMemoryToRegister.cpp   |  3 ++-
 llvm/test/Transforms/SROA/invariant-group.ll       | 14 ++++++++++++++
 3 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 92589ab17da313..a962c2dfe9ac5d 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -3531,8 +3531,19 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
       return true;
     }
 
-    if (II.isLaunderOrStripInvariantGroup())
+    if (II.isLaunderOrStripInvariantGroup()) {
+      Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
+      Value *New = nullptr;
+      if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
+        New = IRB.CreateLaunderInvariantGroup(AdjustedPtr);
+      else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
+        New = IRB.CreateStripInvariantGroup(AdjustedPtr);
+
+      New->takeName(&II);
+      II.replaceAllUsesWith(New);
+      LLVM_DEBUG(dbgs() << "          to: " << *New << "\n");
       return true;
+    }
 
     assert(II.getArgOperand(1) == OldPtr);
     // Lifetime intrinsics are only promotable if they cover the whole alloca.
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 1b7912fdf5e304..95a4178b276177 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -81,7 +81,8 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
         return false;
     } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
       if (!II->isLifetimeStartOrEnd() && !II->isDroppable() &&
-          II->getIntrinsicID() != Intrinsic::fake_use)
+          II->getIntrinsicID() != Intrinsic::fake_use &&
+          !II->isLaunderOrStripInvariantGroup())
         return false;
     } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
       if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI))
diff --git a/llvm/test/Transforms/SROA/invariant-group.ll b/llvm/test/Transforms/SROA/invariant-group.ll
index 1be6f6e2fc32bb..756912e56f0c8d 100644
--- a/llvm/test/Transforms/SROA/invariant-group.ll
+++ b/llvm/test/Transforms/SROA/invariant-group.ll
@@ -142,6 +142,7 @@ define void @partial_promotion_of_alloca() {
 ; CHECK-LABEL: @partial_promotion_of_alloca(
 ; CHECK-NEXT:    [[STRUCT_PTR_SROA_2:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store volatile i32 0, ptr [[STRUCT_PTR_SROA_2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[STRUCT_PTR_SROA_2]])
 ; CHECK-NEXT:    [[STRUCT_PTR_SROA_2_0_STRUCT_PTR_SROA_2_4_LOAD_VAL:%.*]] = load volatile i32, ptr [[STRUCT_PTR_SROA_2]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -155,6 +156,19 @@ define void @partial_promotion_of_alloca() {
   ret void
 }
 
+define void @memcpy_after_laundering_alloca(ptr %ptr) {
+; CHECK-LABEL: @memcpy_after_laundering_alloca(
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca { i64, i64 }, align 8
+; CHECK-NEXT:    [[LAUNDER:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[ALLOCA]])
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[LAUNDER]], ptr [[PTR:%.*]], i64 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %alloca = alloca { i64, i64 }, align 8
+  %launder = call ptr @llvm.launder.invariant.group.p0(ptr %alloca)
+  call void @llvm.memcpy.p0.p0.i64(ptr %launder, ptr %ptr, i64 16, i1 false)
+  ret void
+}
+
 declare void @use(ptr)
 
 !0 = !{}

>From 454da7cc7a00ccd2b3f6230a9ca63456662c790b Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Tue, 1 Oct 2024 21:11:10 +0200
Subject: [PATCH 2/3] !fixup update comment

---
 llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 95a4178b276177..79c7b018357641 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -492,9 +492,9 @@ static void removeIntrinsicUsers(AllocaInst *AI) {
     }
 
     if (!I->getType()->isVoidTy()) {
-      // The only users of this bitcast/GEP instruction are lifetime intrinsics.
-      // Follow the use/def chain to erase them now instead of leaving it for
-      // dead code elimination later.
+      // The only users of this bitcast/GEP instruction are lifetime intrinsics,
+      // fake_use as well as invariant group ones. Follow the use/def chain to
+      // erase them now instead of leaving it for dead code elimination later.
       for (Use &UU : llvm::make_early_inc_range(I->uses())) {
         Instruction *Inst = cast<Instruction>(UU.getUser());
 

>From 2a70bbfcd4f2a62ebc55e3998481c50580b84feb Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Tue, 29 Oct 2024 16:54:21 +0100
Subject: [PATCH 3/3] !fixup maybe rewrite memintrinsics when rewriting
 invariant group

---
 llvm/lib/Transforms/Scalar/SROA.cpp          | 83 +++++++++++++++-----
 llvm/test/Transforms/SROA/invariant-group.ll | 21 +++++
 2 files changed, 83 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index a962c2dfe9ac5d..6f218a95499775 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2656,6 +2656,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
   SmallSetVector<PHINode *, 8> &PHIUsers;
   SmallSetVector<SelectInst *, 8> &SelectUsers;
 
+  // Track invariant intrinsic for rewriting its memory intrinsics users.
+  std::optional<Value *> InvariantIntr;
+
   // Utility IR builder, whose name prefix is setup for each visited use, and
   // the insertion point is set to point to the user.
   IRBuilderTy IRB;
@@ -2789,6 +2792,28 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
     );
   }
 
+  // Return getNewAllocaSlicePtr, unless the new alloca ptr has passed through
+  // invariant group intrinsic, in which case ensure to return such a pointer.
+  Value *getInvariantGroupPtrOrNewAllocaSlicePtr(IRBuilderTy &IRB,
+                                                 Instruction *I) {
+    // Get the newly-rewritten invariant or the current one, if it has been
+    // rewritten in previous iterations or alloca was not split further.
+    if (isa<IntrinsicInst>(I) &&
+        cast<IntrinsicInst>(I)->isLaunderOrStripInvariantGroup())
+      return InvariantIntr ? *InvariantIntr : I;
+    return getNewAllocaSlicePtr(IRB, I->getType());
+  }
+
+  // Return getPtrToNewAI, unless the new alloca ptr has passed through
+  // invariant group intrinsic, in which case ensure to return such a pointer.
+  Value *getInvariantGroupPtrOrPtrToNewAI(Instruction *I, unsigned AddrSpace,
+                                          bool IsVolatile) {
+    if (isa<IntrinsicInst>(I) &&
+        cast<IntrinsicInst>(I)->isLaunderOrStripInvariantGroup())
+      return InvariantIntr ? *InvariantIntr : I;
+    return getPtrToNewAI(AddrSpace, IsVolatile);
+  }
+
   /// Compute suitable alignment to access this slice of the *new*
   /// alloca.
   ///
@@ -3141,12 +3166,13 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
 
     AAMDNodes AATags = II.getAAMetadata();
 
+    Value *NewDestPtr = getInvariantGroupPtrOrNewAllocaSlicePtr(IRB, OldPtr);
     // If the memset has a variable size, it cannot be split, just adjust the
     // pointer to the new alloca.
     if (!isa<ConstantInt>(II.getLength())) {
       assert(!IsSplit);
       assert(NewBeginOffset == BeginOffset);
-      II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
+      II.setDest(NewDestPtr);
       II.setDestAlignment(getSliceAlign());
       // In theory we should call migrateDebugInfo here. However, we do not
       // emit dbg.assign intrinsics for mem intrinsics storing through non-
@@ -3186,9 +3212,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
       Type *SizeTy = II.getLength()->getType();
       unsigned Sz = NewEndOffset - NewBeginOffset;
       Constant *Size = ConstantInt::get(SizeTy, Sz);
-      MemIntrinsic *New = cast<MemIntrinsic>(IRB.CreateMemSet(
-          getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
-          MaybeAlign(getSliceAlign()), II.isVolatile()));
+      MemIntrinsic *New = cast<MemIntrinsic>(
+          IRB.CreateMemSet(NewDestPtr, II.getValue(), Size,
+                           MaybeAlign(getSliceAlign()), II.isVolatile()));
       if (AATags)
         New->setAAMetadata(
             AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz));
@@ -3261,7 +3287,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
       V = convertValue(DL, IRB, V, AllocaTy);
     }
 
-    Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
+    Value *NewPtr = getInvariantGroupPtrOrPtrToNewAI(
+        OldPtr, II.getDestAddressSpace(), II.isVolatile());
     StoreInst *New =
         IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());
     New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
@@ -3372,13 +3399,13 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
     OtherAlign =
         commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
 
-    if (EmitMemCpy) {
-      // Compute the other pointer, folding as much as possible to produce
-      // a single, simple GEP in most cases.
-      OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
-                                OtherPtr->getName() + ".");
+    // Compute the other pointer, folding as much as possible to produce
+    // a single, simple GEP in most cases.
+    OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
+                              OtherPtr->getName() + ".");
 
-      Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
+    if (EmitMemCpy) {
+      Value *OutPtr = getInvariantGroupPtrOrNewAllocaSlicePtr(IRB, OldPtr);
       Type *SizeTy = II.getLength()->getType();
       Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
 
@@ -3386,14 +3413,14 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
       MaybeAlign DestAlign, SrcAlign;
       // Note: IsDest is true iff we're copying into the new alloca slice
       if (IsDest) {
-        DestPtr = OurPtr;
+        DestPtr = OutPtr;
         DestAlign = SliceAlign;
         SrcPtr = OtherPtr;
         SrcAlign = OtherAlign;
       } else {
         DestPtr = OtherPtr;
         DestAlign = OtherAlign;
-        SrcPtr = OurPtr;
+        SrcPtr = OutPtr;
         SrcAlign = SliceAlign;
       }
       CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
@@ -3438,8 +3465,6 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
       OtherTy = NewAllocaTy;
     }
 
-    Value *AdjPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
-                                   OtherPtr->getName() + ".");
     MaybeAlign SrcAlign = OtherAlign;
     MaybeAlign DstAlign = SliceAlign;
     if (!IsDest)
@@ -3449,11 +3474,13 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
     Value *DstPtr;
 
     if (IsDest) {
-      DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
-      SrcPtr = AdjPtr;
+      DstPtr = getInvariantGroupPtrOrPtrToNewAI(
+          OldPtr, II.getDestAddressSpace(), II.isVolatile());
+      SrcPtr = OtherPtr;
     } else {
-      DstPtr = AdjPtr;
-      SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile());
+      DstPtr = OtherPtr;
+      SrcPtr = getInvariantGroupPtrOrPtrToNewAI(
+          OldPtr, II.getSourceAddressSpace(), II.isVolatile());
     }
 
     Value *Src;
@@ -3539,8 +3566,22 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
       else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
         New = IRB.CreateStripInvariantGroup(AdjustedPtr);
 
-      New->takeName(&II);
-      II.replaceAllUsesWith(New);
+      if (&OldAI == &NewAI) {
+        New->takeName(&II);
+        II.replaceAllUsesWith(New);
+      } else {
+        // If the alloca can be split further, memory intrinsics using the
+        // invariant group may also need to be rewritten. Record the invariant
+        // for when the memory intrinsic is later visited.
+        for (Use &U : II.uses())
+          if (isa<MemIntrinsic>(U.getUser())) {
+            if (!InvariantIntr)
+              InvariantIntr = New;
+            continue;
+          } else {
+            U.set(New);
+          }
+      }
       LLVM_DEBUG(dbgs() << "          to: " << *New << "\n");
       return true;
     }
diff --git a/llvm/test/Transforms/SROA/invariant-group.ll b/llvm/test/Transforms/SROA/invariant-group.ll
index 756912e56f0c8d..2b4e588ba0358e 100644
--- a/llvm/test/Transforms/SROA/invariant-group.ll
+++ b/llvm/test/Transforms/SROA/invariant-group.ll
@@ -169,6 +169,27 @@ define void @memcpy_after_laundering_alloca(ptr %ptr) {
   ret void
 }
 
+define void @memcpy_after_laundering_alloca_slices(ptr %ptr) {
+; CHECK-LABEL: @memcpy_after_laundering_alloca_slices(
+; CHECK-NEXT:    [[ALLOCA_SROA_0:%.*]] = alloca [16 x i8], align 8
+; CHECK-NEXT:    [[ALLOCA_SROA_3:%.*]] = alloca [16 x i8], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[ALLOCA_SROA_0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[ALLOCA_SROA_3]])
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP1]], ptr align 1 [[PTR:%.*]], i64 16, i1 false)
+; CHECK-NEXT:    [[ALLOCA_SROA_2_0_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 16
+; CHECK-NEXT:    [[ALLOCA_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[ALLOCA_SROA_2_0_PTR_SROA_IDX]], align 1
+; CHECK-NEXT:    [[ALLOCA_SROA_3_0_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 24
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 1 [[ALLOCA_SROA_3_0_PTR_SROA_IDX]], i64 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %alloca = alloca { [16 x i8], i64, [16 x i8] }, align 8
+  %launder = call ptr @llvm.launder.invariant.group.p0(ptr %alloca)
+  %gep = getelementptr i8, ptr %launder, i64 16
+  store i64 0, ptr %gep
+  call void @llvm.memcpy.p0.p0.i64(ptr %launder, ptr %ptr, i64 40, i1 false)
+  ret void
+}
+
 declare void @use(ptr)
 
 !0 = !{}



More information about the llvm-commits mailing list