[llvm] [SROA] Improve handling of lifetimes in load-only promotion (PR #135382)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 11 07:55:23 PDT 2025


https://github.com/nikic updated https://github.com/llvm/llvm-project/pull/135382

>From a170eb523829fbf56d92112d70ef8e35512eb5d4 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Fri, 11 Apr 2025 15:28:08 +0200
Subject: [PATCH 1/3] Add lifetime tests

---
 .../test/Transforms/SROA/readonlynocapture.ll | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/llvm/test/Transforms/SROA/readonlynocapture.ll b/llvm/test/Transforms/SROA/readonlynocapture.ll
index 2ad20fcc51dc5..350da3eb3a005 100644
--- a/llvm/test/Transforms/SROA/readonlynocapture.ll
+++ b/llvm/test/Transforms/SROA/readonlynocapture.ll
@@ -456,4 +456,49 @@ define i32 @provenance_only_capture() {
   ret i32 %l1
 }
 
+define i32 @simple_with_lifetimes() {
+; CHECK-LABEL: @simple_with_lifetimes(
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr [[A]])
+; CHECK-NEXT:    store i32 0, ptr [[A]], align 4
+; CHECK-NEXT:    call void @callee(ptr [[A]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr [[A]])
+; CHECK-NEXT:    ret i32 0
+;
+  %a = alloca i32
+  call void @llvm.lifetime.start(i64 4, ptr %a)
+  store i32 0, ptr %a
+  call void @callee(ptr %a)
+  %l1 = load i32, ptr %a
+  call void @llvm.lifetime.end(i64 4, ptr %a)
+  ret i32 %l1
+}
+
+define i32 @twoalloc_with_lifetimes() {
+; CHECK-LABEL: @twoalloc_with_lifetimes(
+; CHECK-NEXT:    [[A:%.*]] = alloca { i32, i32 }, align 8
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr [[A]])
+; CHECK-NEXT:    store i32 0, ptr [[A]], align 4
+; CHECK-NEXT:    [[B:%.*]] = getelementptr i32, ptr [[A]], i32 1
+; CHECK-NEXT:    store i32 1, ptr [[B]], align 4
+; CHECK-NEXT:    call void @callee(ptr [[A]])
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    [[L2:%.*]] = load i32, ptr [[B]], align 4
+; CHECK-NEXT:    [[R:%.*]] = add i32 [[L1]], [[L2]]
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr [[A]])
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = alloca {i32, i32}
+  call void @llvm.lifetime.start(i64 8, ptr %a)
+  store i32 0, ptr %a
+  %b = getelementptr i32, ptr %a, i32 1
+  store i32 1, ptr %b
+  call void @callee(ptr %a)
+  %l1 = load i32, ptr %a
+  %l2 = load i32, ptr %b
+  %r = add i32 %l1, %l2
+  call void @llvm.lifetime.end(i64 8, ptr %a)
+  ret i32 %r
+}
+
 declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)

>From d3825e4d18ed0133bb90235d1c41b18126eb093b Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Fri, 11 Apr 2025 16:41:21 +0200
Subject: [PATCH 2/3] [SROA] Improve handling of lifetimes in load-only
 promotion

The propagateStoredValuesToLoads() transform currently bails out
if there is a lifetime intrinsic spanning the whole alloca, but
the individual loads/stores operate on some smaller part, because
the slice / partition size does not match.

Fix this by ignoring assume-like slices early, regardless of which
range they cover.

I've changed the overall code structure here a bit because I was
getting confused by the different iterators.
---
 llvm/lib/Transforms/Scalar/SROA.cpp           | 96 ++++++++++---------
 .../SROA/non-capturing-call-readonly.ll       |  3 +-
 .../test/Transforms/SROA/readonlynocapture.ll |  4 +-
 3 files changed, 54 insertions(+), 49 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 4e444d8d4cefc..1c0e00e67b87f 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -5498,45 +5498,14 @@ bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
   // that do not overlap with any before them. The slices are sorted by
   // increasing beginOffset. We don't use AS.partitions(), as it will use a more
   // sophisticated algorithm that takes splittable slices into account.
-  auto PartitionBegin = AS.begin();
-  auto PartitionEnd = PartitionBegin;
-  uint64_t BeginOffset = PartitionBegin->beginOffset();
-  uint64_t EndOffset = PartitionBegin->endOffset();
-  while (PartitionBegin != AS.end()) {
-    bool AllSameAndValid = true;
-    SmallVector<Instruction *> Insts;
-    Type *PartitionType = nullptr;
-    while (PartitionEnd != AS.end() &&
-           (PartitionEnd->beginOffset() < EndOffset ||
-            PartitionEnd->endOffset() <= EndOffset)) {
-      if (AllSameAndValid) {
-        AllSameAndValid &= PartitionEnd->beginOffset() == BeginOffset &&
-                           PartitionEnd->endOffset() == EndOffset;
-        Instruction *User =
-            cast<Instruction>(PartitionEnd->getUse()->getUser());
-        if (auto *LI = dyn_cast<LoadInst>(User)) {
-          Type *UserTy = LI->getType();
-          // LoadAndStorePromoter requires all the types to be the same.
-          if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
-            AllSameAndValid = false;
-          PartitionType = UserTy;
-          Insts.push_back(User);
-        } else if (auto *SI = dyn_cast<StoreInst>(User)) {
-          Type *UserTy = SI->getValueOperand()->getType();
-          if (!SI->isSimple() || (PartitionType && UserTy != PartitionType))
-            AllSameAndValid = false;
-          PartitionType = UserTy;
-          Insts.push_back(User);
-        } else if (!isAssumeLikeIntrinsic(User)) {
-          AllSameAndValid = false;
-        }
-      }
-      EndOffset = std::max(EndOffset, PartitionEnd->endOffset());
-      ++PartitionEnd;
-    }
+  LLVM_DEBUG(dbgs() << "Attempting to propagate values on " << AI << "\n");
+  bool AllSameAndValid = true;
+  Type *PartitionType = nullptr;
+  SmallVector<Instruction *> Insts;
+  uint64_t BeginOffset = 0;
+  uint64_t EndOffset = 0;
 
-    // So long as all the slices start and end offsets matched, update loads to
-    // the values stored in the partition.
+  auto Flush = [&]() {
     if (AllSameAndValid && !Insts.empty()) {
       LLVM_DEBUG(dbgs() << "Propagate values on slice [" << BeginOffset << ", "
                         << EndOffset << ")\n");
@@ -5546,14 +5515,53 @@ bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
       BasicLoadAndStorePromoter Promoter(Insts, SSA, PartitionType);
       Promoter.run(Insts);
     }
+    AllSameAndValid = true;
+    PartitionType = nullptr;
+    Insts.clear();
+  };
 
-    // Step on to the next partition.
-    PartitionBegin = PartitionEnd;
-    if (PartitionBegin == AS.end())
-      break;
-    BeginOffset = PartitionBegin->beginOffset();
-    EndOffset = PartitionBegin->endOffset();
+  for (Slice &S : AS) {
+    auto *User = cast<Instruction>(S.getUse()->getUser());
+    if (isAssumeLikeIntrinsic(User)) {
+      LLVM_DEBUG({
+        dbgs() << "Ignoring slice: ";
+        AS.print(dbgs(), &S);
+      });
+      continue;
+    }
+    if (S.beginOffset() >= EndOffset) {
+      Flush();
+      BeginOffset = S.beginOffset();
+      EndOffset = S.endOffset();
+    } else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
+      LLVM_DEBUG({
+        dbgs() << "Slice does not match range [" << BeginOffset << ", "
+               << EndOffset << ")";
+        AS.print(dbgs(), &S);
+      });
+      AllSameAndValid = false;
+      EndOffset = std::max(EndOffset, S.endOffset());
+    }
+
+    if (auto *LI = dyn_cast<LoadInst>(User)) {
+      Type *UserTy = LI->getType();
+      // LoadAndStorePromoter requires all the types to be the same.
+      if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
+        AllSameAndValid = false;
+      PartitionType = UserTy;
+      Insts.push_back(User);
+    } else if (auto *SI = dyn_cast<StoreInst>(User)) {
+      Type *UserTy = SI->getValueOperand()->getType();
+      if (!SI->isSimple() || (PartitionType && UserTy != PartitionType))
+        AllSameAndValid = false;
+      PartitionType = UserTy;
+      Insts.push_back(User);
+    } else {
+      AllSameAndValid = false;
+    }
   }
+
+  Flush();
   return true;
 }
 
diff --git a/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll b/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
index a37f02df46c75..13808b2aa8916 100644
--- a/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
+++ b/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
@@ -803,8 +803,7 @@ define i64 @do_schedule_instrs_for_dce_after_fixups() {
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 1
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @user_of_alloca(ptr [[ADD_PTR]])
-; CHECK-NEXT:    [[LD:%.*]] = load i64, ptr [[C]], align 4
-; CHECK-NEXT:    ret i64 [[LD]]
+; CHECK-NEXT:    ret i64 0
 ;
 entry:
   %c = alloca i64, align 2
diff --git a/llvm/test/Transforms/SROA/readonlynocapture.ll b/llvm/test/Transforms/SROA/readonlynocapture.ll
index 350da3eb3a005..1cbfe436f9591 100644
--- a/llvm/test/Transforms/SROA/readonlynocapture.ll
+++ b/llvm/test/Transforms/SROA/readonlynocapture.ll
@@ -482,9 +482,7 @@ define i32 @twoalloc_with_lifetimes() {
 ; CHECK-NEXT:    [[B:%.*]] = getelementptr i32, ptr [[A]], i32 1
 ; CHECK-NEXT:    store i32 1, ptr [[B]], align 4
 ; CHECK-NEXT:    call void @callee(ptr [[A]])
-; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT:    [[L2:%.*]] = load i32, ptr [[B]], align 4
-; CHECK-NEXT:    [[R:%.*]] = add i32 [[L1]], [[L2]]
+; CHECK-NEXT:    [[R:%.*]] = add i32 0, 1
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr [[A]])
 ; CHECK-NEXT:    ret i32 [[R]]
 ;

>From 2014ba537f3c532a8d692079ec540237a8ce8a58 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Fri, 11 Apr 2025 16:50:23 +0200
Subject: [PATCH 3/3] avoid some redundant work

---
 llvm/lib/Transforms/Scalar/SROA.cpp | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 1c0e00e67b87f..7d49b63a8e4f6 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -5534,13 +5534,16 @@ bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
       BeginOffset = S.beginOffset();
       EndOffset = S.endOffset();
     } else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
-      LLVM_DEBUG({
-        dbgs() << "Slice does not match range [" << BeginOffset << ", "
-               << EndOffset << ")";
-        AS.print(dbgs(), &S);
-      });
-      AllSameAndValid = false;
+      if (AllSameAndValid) {
+        LLVM_DEBUG({
+          dbgs() << "Slice does not match range [" << BeginOffset << ", "
+                 << EndOffset << ")";
+          AS.print(dbgs(), &S);
+        });
+        AllSameAndValid = false;
+      }
       EndOffset = std::max(EndOffset, S.endOffset());
+      continue;
     }
 
     if (auto *LI = dyn_cast<LoadInst>(User)) {



More information about the llvm-commits mailing list