[llvm] a947419 - [SROA] Improve handling of lifetimes in load-only promotion (#135382)

via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 14 02:52:45 PDT 2025


Author: Nikita Popov
Date: 2025-04-14T11:52:42+02:00
New Revision: a9474191e059edbb9b25c769afbe52c7a823f359

URL: https://github.com/llvm/llvm-project/commit/a9474191e059edbb9b25c769afbe52c7a823f359
DIFF: https://github.com/llvm/llvm-project/commit/a9474191e059edbb9b25c769afbe52c7a823f359.diff

LOG: [SROA] Improve handling of lifetimes in load-only promotion (#135382)

The propagateStoredValuesToLoads() transform currently bails out if
there is a lifetime intrinsic spanning the whole alloca, but the
individual loads/stores operate on some smaller part, because the slice
/ partition size does not match.
    
Fix this by ignoring assume-like slices early, regardless of which range
they cover.
    
I've changed the overall code structure here a bit because I was getting
confused by the different iterators.

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/SROA.cpp
    llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
    llvm/test/Transforms/SROA/readonlynocapture.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 4e444d8d4cefc..7d49b63a8e4f6 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -5498,45 +5498,14 @@ bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
   // that do not overlap with any before them. The slices are sorted by
   // increasing beginOffset. We don't use AS.partitions(), as it will use a more
   // sophisticated algorithm that takes splittable slices into account.
-  auto PartitionBegin = AS.begin();
-  auto PartitionEnd = PartitionBegin;
-  uint64_t BeginOffset = PartitionBegin->beginOffset();
-  uint64_t EndOffset = PartitionBegin->endOffset();
-  while (PartitionBegin != AS.end()) {
-    bool AllSameAndValid = true;
-    SmallVector<Instruction *> Insts;
-    Type *PartitionType = nullptr;
-    while (PartitionEnd != AS.end() &&
-           (PartitionEnd->beginOffset() < EndOffset ||
-            PartitionEnd->endOffset() <= EndOffset)) {
-      if (AllSameAndValid) {
-        AllSameAndValid &= PartitionEnd->beginOffset() == BeginOffset &&
-                           PartitionEnd->endOffset() == EndOffset;
-        Instruction *User =
-            cast<Instruction>(PartitionEnd->getUse()->getUser());
-        if (auto *LI = dyn_cast<LoadInst>(User)) {
-          Type *UserTy = LI->getType();
-          // LoadAndStorePromoter requires all the types to be the same.
-          if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
-            AllSameAndValid = false;
-          PartitionType = UserTy;
-          Insts.push_back(User);
-        } else if (auto *SI = dyn_cast<StoreInst>(User)) {
-          Type *UserTy = SI->getValueOperand()->getType();
-          if (!SI->isSimple() || (PartitionType && UserTy != PartitionType))
-            AllSameAndValid = false;
-          PartitionType = UserTy;
-          Insts.push_back(User);
-        } else if (!isAssumeLikeIntrinsic(User)) {
-          AllSameAndValid = false;
-        }
-      }
-      EndOffset = std::max(EndOffset, PartitionEnd->endOffset());
-      ++PartitionEnd;
-    }
+  LLVM_DEBUG(dbgs() << "Attempting to propagate values on " << AI << "\n");
+  bool AllSameAndValid = true;
+  Type *PartitionType = nullptr;
+  SmallVector<Instruction *> Insts;
+  uint64_t BeginOffset = 0;
+  uint64_t EndOffset = 0;
 
-    // So long as all the slices start and end offsets matched, update loads to
-    // the values stored in the partition.
+  auto Flush = [&]() {
     if (AllSameAndValid && !Insts.empty()) {
       LLVM_DEBUG(dbgs() << "Propagate values on slice [" << BeginOffset << ", "
                         << EndOffset << ")\n");
@@ -5546,14 +5515,56 @@ bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
       BasicLoadAndStorePromoter Promoter(Insts, SSA, PartitionType);
       Promoter.run(Insts);
     }
+    AllSameAndValid = true;
+    PartitionType = nullptr;
+    Insts.clear();
+  };
 
-    // Step on to the next partition.
-    PartitionBegin = PartitionEnd;
-    if (PartitionBegin == AS.end())
-      break;
-    BeginOffset = PartitionBegin->beginOffset();
-    EndOffset = PartitionBegin->endOffset();
+  for (Slice &S : AS) {
+    auto *User = cast<Instruction>(S.getUse()->getUser());
+    if (isAssumeLikeIntrinsic(User)) {
+      LLVM_DEBUG({
+        dbgs() << "Ignoring slice: ";
+        AS.print(dbgs(), &S);
+      });
+      continue;
+    }
+    if (S.beginOffset() >= EndOffset) {
+      Flush();
+      BeginOffset = S.beginOffset();
+      EndOffset = S.endOffset();
+    } else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
+      if (AllSameAndValid) {
+        LLVM_DEBUG({
+          dbgs() << "Slice does not match range [" << BeginOffset << ", "
+                 << EndOffset << ")";
+          AS.print(dbgs(), &S);
+        });
+        AllSameAndValid = false;
+      }
+      EndOffset = std::max(EndOffset, S.endOffset());
+      continue;
+    }
+
+    if (auto *LI = dyn_cast<LoadInst>(User)) {
+      Type *UserTy = LI->getType();
+      // LoadAndStorePromoter requires all the types to be the same.
+      if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
+        AllSameAndValid = false;
+      PartitionType = UserTy;
+      Insts.push_back(User);
+    } else if (auto *SI = dyn_cast<StoreInst>(User)) {
+      Type *UserTy = SI->getValueOperand()->getType();
+      if (!SI->isSimple() || (PartitionType && UserTy != PartitionType))
+        AllSameAndValid = false;
+      PartitionType = UserTy;
+      Insts.push_back(User);
+    } else {
+      AllSameAndValid = false;
+    }
   }
+
+  Flush();
   return true;
 }
 

diff  --git a/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll b/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
index a37f02df46c75..13808b2aa8916 100644
--- a/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
+++ b/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
@@ -803,8 +803,7 @@ define i64 @do_schedule_instrs_for_dce_after_fixups() {
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 1
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @user_of_alloca(ptr [[ADD_PTR]])
-; CHECK-NEXT:    [[LD:%.*]] = load i64, ptr [[C]], align 4
-; CHECK-NEXT:    ret i64 [[LD]]
+; CHECK-NEXT:    ret i64 0
 ;
 entry:
   %c = alloca i64, align 2

diff  --git a/llvm/test/Transforms/SROA/readonlynocapture.ll b/llvm/test/Transforms/SROA/readonlynocapture.ll
index 2ad20fcc51dc5..1cbfe436f9591 100644
--- a/llvm/test/Transforms/SROA/readonlynocapture.ll
+++ b/llvm/test/Transforms/SROA/readonlynocapture.ll
@@ -456,4 +456,47 @@ define i32 @provenance_only_capture() {
   ret i32 %l1
 }
 
+define i32 @simple_with_lifetimes() {
+; CHECK-LABEL: @simple_with_lifetimes(
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr [[A]])
+; CHECK-NEXT:    store i32 0, ptr [[A]], align 4
+; CHECK-NEXT:    call void @callee(ptr [[A]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr [[A]])
+; CHECK-NEXT:    ret i32 0
+;
+  %a = alloca i32
+  call void @llvm.lifetime.start(i64 4, ptr %a)
+  store i32 0, ptr %a
+  call void @callee(ptr %a)
+  %l1 = load i32, ptr %a
+  call void @llvm.lifetime.end(i64 4, ptr %a)
+  ret i32 %l1
+}
+
+define i32 @twoalloc_with_lifetimes() {
+; CHECK-LABEL: @twoalloc_with_lifetimes(
+; CHECK-NEXT:    [[A:%.*]] = alloca { i32, i32 }, align 8
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr [[A]])
+; CHECK-NEXT:    store i32 0, ptr [[A]], align 4
+; CHECK-NEXT:    [[B:%.*]] = getelementptr i32, ptr [[A]], i32 1
+; CHECK-NEXT:    store i32 1, ptr [[B]], align 4
+; CHECK-NEXT:    call void @callee(ptr [[A]])
+; CHECK-NEXT:    [[R:%.*]] = add i32 0, 1
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr [[A]])
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = alloca {i32, i32}
+  call void @llvm.lifetime.start(i64 8, ptr %a)
+  store i32 0, ptr %a
+  %b = getelementptr i32, ptr %a, i32 1
+  store i32 1, ptr %b
+  call void @callee(ptr %a)
+  %l1 = load i32, ptr %a
+  %l2 = load i32, ptr %b
+  %r = add i32 %l1, %l2
+  call void @llvm.lifetime.end(i64 8, ptr %a)
+  ret i32 %r
+}
+
 declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)


        


More information about the llvm-commits mailing list