[llvm] [DSE] Split memory intrinsics if they are dead in the middle (PR #75478)

Nabeel Omer via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 4 02:59:50 PDT 2024


https://github.com/omern1 updated https://github.com/llvm/llvm-project/pull/75478

>From 5b68d4dfcdaff475dcee4f29818ba16ab317a507 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Thu, 14 Dec 2023 18:01:39 +0000
Subject: [PATCH 01/10] Pre-committed tests

---
 .../test/DebugInfo/dse-split-memintrinsics.ll | 117 ++++++++++++++++++
 .../DeadStoreElimination/dead-middle-split.ll |  39 ++++++
 2 files changed, 156 insertions(+)
 create mode 100644 llvm/test/DebugInfo/dse-split-memintrinsics.ll
 create mode 100644 llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll

diff --git a/llvm/test/DebugInfo/dse-split-memintrinsics.ll b/llvm/test/DebugInfo/dse-split-memintrinsics.ll
new file mode 100644
index 0000000000000..1decd22add665
--- /dev/null
+++ b/llvm/test/DebugInfo/dse-split-memintrinsics.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=dse -S | FileCheck %s
+
+; Check a dbg.assign is inserted that sets the dead middle bits to no-location (see tryToSplitMiddle).
+
+define void @_Z22overwrite_middle_localv() !dbg !23 {
+; CHECK-LABEL: define void @_Z22overwrite_middle_localv(
+; CHECK-SAME: ) !dbg [[DBG23:![0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BLOB:%.*]] = alloca [1000 x i8], align 16, !DIAssignID [[DIASSIGNID33:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28:![0-9]+]], metadata !DIExpression(), metadata [[DIASSIGNID33]], metadata ptr [[BLOB]], metadata !DIExpression()), !dbg [[DBG34:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 1000, ptr nonnull [[BLOB]]), !dbg [[DBG34]]
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) [[BLOB]], i8 5, i64 1000, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID35:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(), metadata [[DIASSIGNID35]], metadata ptr [[BLOB]], metadata !DIExpression()), !dbg [[DBG34]]
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[BLOB]], i64 10, !dbg [[DBG34]]
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 2 dereferenceable(980) [[ADD_PTR]], i8 3, i64 980, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID36:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7840), metadata [[DIASSIGNID36]], metadata ptr [[ADD_PTR]], metadata !DIExpression()), !dbg [[DBG34]]
+; CHECK-NEXT:    call void @_Z3escPc(ptr noundef nonnull [[BLOB]]), !dbg [[DBG34]]
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 1000, ptr nonnull [[BLOB]]), !dbg [[DBG34]]
+; CHECK-NEXT:    ret void, !dbg [[DBG34]]
+;
+entry:
+  %blob = alloca [1000 x i8], align 16, !DIAssignID !33
+  call void @llvm.dbg.assign(metadata i1 undef, metadata !28, metadata !DIExpression(), metadata !33, metadata ptr %blob, metadata !DIExpression()), !dbg !34
+  call void @llvm.lifetime.start.p0(i64 1000, ptr nonnull %blob), !dbg !34
+  call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) %blob, i8 5, i64 1000, i1 false), !dbg !34, !DIAssignID !35
+  call void @llvm.dbg.assign(metadata i1 undef, metadata !28, metadata !DIExpression(), metadata !35, metadata ptr %blob, metadata !DIExpression()), !dbg !34
+  %add.ptr = getelementptr inbounds i8, ptr %blob, i64 10, !dbg !34
+  call void @llvm.memset.p0.i64(ptr noundef nonnull align 2 dereferenceable(980) %add.ptr, i8 3, i64 980, i1 false), !dbg !34, !DIAssignID !36
+  call void @llvm.dbg.assign(metadata i1 undef, metadata !28, metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7840), metadata !36, metadata ptr %add.ptr, metadata !DIExpression()), !dbg !34
+  call void @_Z3escPc(ptr noundef nonnull %blob), !dbg !34
+  call void @llvm.lifetime.end.p0(i64 1000, ptr nonnull %blob), !dbg !34
+  ret void, !dbg !34
+}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
+
+declare void @_Z3escPc(ptr noundef)
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!15, !16, !17, !18, !19, !20, !21}
+!llvm.ident = !{!22}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, imports: !2, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "debuginfo.cpp", directory: "/home", checksumkind: CSK_MD5, checksum: "3dc84462c14a3d86dd372d0473fa13aa")
+!2 = !{!3}
+!3 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !4, entity: !5, file: !14, line: 81)
+!4 = !DINamespace(name: "std", scope: null)
+!5 = !DISubprogram(name: "memset", scope: !6, file: !6, line: 61, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
+!6 = !DIFile(filename: "/usr/include/string.h", directory: "", checksumkind: CSK_MD5, checksum: "3fc3efdf2e52b973f380a6e7608374ff")
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9, !9, !10, !11}
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", file: !12, line: 13, baseType: !13)
+!12 = !DIFile(filename: "build_upstream/lib/clang/18/include/__stddef_size_t.h", directory: "/home", checksumkind: CSK_MD5, checksum: "405db6ea5fb824de326715f26fa9fab5")
+!13 = !DIBasicType(name: "unsigned long", size: 64, encoding: DW_ATE_unsigned)
+!14 = !DIFile(filename: "/usr/lib64/gcc/x86_64-suse-linux/13/../../../../include/c++/13/cstring", directory: "")
+!15 = !{i32 7, !"Dwarf Version", i32 5}
+!16 = !{i32 2, !"Debug Info Version", i32 3}
+!17 = !{i32 1, !"wchar_size", i32 4}
+!18 = !{i32 8, !"PIC Level", i32 2}
+!19 = !{i32 7, !"PIE Level", i32 2}
+!20 = !{i32 7, !"uwtable", i32 2}
+!21 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+!22 = !{!"clang version 18.0.0"}
+!23 = distinct !DISubprogram(name: "overwrite_middle_local", linkageName: "_Z22overwrite_middle_localv", scope: !24, file: !24, line: 3, type: !25, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !27)
+!24 = !DIFile(filename: "./debuginfo.cpp", directory: "/home", checksumkind: CSK_MD5, checksum: "3dc84462c14a3d86dd372d0473fa13aa")
+!25 = !DISubroutineType(types: !26)
+!26 = !{null}
+!27 = !{!28}
+!28 = !DILocalVariable(name: "blob", scope: !23, file: !24, line: 4, type: !29)
+!29 = !DICompositeType(tag: DW_TAG_array_type, baseType: !30, size: 8000, elements: !31)
+!30 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!31 = !{!32}
+!32 = !DISubrange(count: 1000)
+!33 = distinct !DIAssignID()
+!34 = !DILocation(line: 0, scope: !23)
+!35 = distinct !DIAssignID()
+!36 = distinct !DIAssignID()
+;.
+; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, imports: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
+; CHECK: [[META1]] = !DIFile(filename: "debuginfo.cpp", directory: {{.*}})
+; CHECK: [[META2]] = !{[[META3:![0-9]+]]}
+; CHECK: [[META3]] = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: [[META4:![0-9]+]], entity: [[META5:![0-9]+]], file: [[META14:![0-9]+]], line: 81)
+; CHECK: [[META4]] = !DINamespace(name: "std", scope: null)
+; CHECK: [[META5]] = !DISubprogram(name: "memset", scope: [[META6:![0-9]+]], file: [[META6]], line: 61, type: [[META7:![0-9]+]], flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
+; CHECK: [[META6]] = !DIFile(filename: "/usr/include/string.h", directory: {{.*}})
+; CHECK: [[META7]] = !DISubroutineType(types: [[META8:![0-9]+]])
+; CHECK: [[META8]] = !{[[META9:![0-9]+]], [[META9]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
+; CHECK: [[META9]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
+; CHECK: [[META10]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+; CHECK: [[META11]] = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", file: [[META12:![0-9]+]], line: 13, baseType: [[META13:![0-9]+]])
+; CHECK: [[META12]] = !DIFile(filename: "build_upstream/lib/clang/18/include/__stddef_size_t.h", directory: {{.*}})
+; CHECK: [[META13]] = !DIBasicType(name: "unsigned long", size: 64, encoding: DW_ATE_unsigned)
+; CHECK: [[META14]] = !DIFile(filename: "/usr/lib64/gcc/x86_64-suse-linux/13/../../../../include/c++/13/cstring", directory: "")
+; CHECK: [[DBG23]] = distinct !DISubprogram(name: "overwrite_middle_local", linkageName: "_Z22overwrite_middle_localv", scope: [[META24:![0-9]+]], file: [[META24]], line: 3, type: [[META25:![0-9]+]], scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META27:![0-9]+]])
+; CHECK: [[META24]] = !DIFile(filename: "./debuginfo.cpp", directory: {{.*}})
+; CHECK: [[META25]] = !DISubroutineType(types: [[META26:![0-9]+]])
+; CHECK: [[META26]] = !{null}
+; CHECK: [[META27]] = !{[[META28]]}
+; CHECK: [[META28]] = !DILocalVariable(name: "blob", scope: [[DBG23]], file: [[META24]], line: 4, type: [[META29:![0-9]+]])
+; CHECK: [[META29]] = !DICompositeType(tag: DW_TAG_array_type, baseType: [[META30:![0-9]+]], size: 8000, elements: [[META31:![0-9]+]])
+; CHECK: [[META30]] = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+; CHECK: [[META31]] = !{[[META32:![0-9]+]]}
+; CHECK: [[META32]] = !DISubrange(count: 1000)
+; CHECK: [[DIASSIGNID33]] = distinct !DIAssignID()
+; CHECK: [[DBG34]] = !DILocation(line: 0, scope: [[DBG23]])
+; CHECK: [[DIASSIGNID35]] = distinct !DIAssignID()
+; CHECK: [[DIASSIGNID36]] = distinct !DIAssignID()
+;.
diff --git a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
new file mode 100644
index 0000000000000..8d72deefc635b
--- /dev/null
+++ b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+
+; RUN: opt < %s -passes=dse -S | FileCheck %s
+
+define dso_local void @overwrite_middle(ptr nocapture noundef writeonly %X) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @overwrite_middle(
+; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) [[X]], i8 5, i64 1000, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 10
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(980) [[ARRAYIDX]], i8 3, i64 980, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) %X, i8 5, i64 1000, i1 false)
+  %arrayidx = getelementptr inbounds i8, ptr %X, i64 10
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(980) %arrayidx, i8 3, i64 980, i1 false)
+  ret void
+}
+
+define dso_local void @overwrite_middle2(ptr nocapture noundef writeonly %X) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @overwrite_middle2(
+; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(1000) [[X]], i8 5, i64 1000, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 10
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(980) [[ARRAYIDX]], i8 3, i64 980, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(1000) %X, i8 5, i64 1000, i1 false)
+  %arrayidx = getelementptr inbounds i8, ptr %X, i64 10
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(980) %arrayidx, i8 3, i64 980, i1 false)
+  ret void
+}
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1
+

>From 6f26953df74bb8a9c0d6d760870882af7db43670 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Thu, 14 Dec 2023 18:03:20 +0000
Subject: [PATCH 02/10] [DSE] Split memory intrinsics that are dead in the
 middle

DSE can already shorten intrinsics which have dead fronts or rears.
This patch enables DSE to split memory intrinsics that
are dead in the middle into `Front` and `Rear`:

```
  // __Front__                 ___Rear___
  // | ------------- Dead ------------- |
  //         | --- Killing --- |
```

Resolves #72113
---
 .../Scalar/DeadStoreElimination.cpp           | 96 +++++++++++++++++--
 .../test/DebugInfo/dse-split-memintrinsics.ll | 12 ++-
 .../DeadStoreElimination/dead-middle-split.ll |  8 +-
 3 files changed, 103 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 008dcc53fd44f..57ff807745cba 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -48,6 +48,7 @@
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/BasicBlock.h"
@@ -554,6 +555,80 @@ static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
   }
 }
 
+static bool tryToSplitMiddle(Instruction *DeadI,
+                             OverlapIntervalsTy &IntervalMap,
+                             int64_t &DeadStart, uint64_t &DeadSize,
+                             const TargetTransformInfo &TTI) {
+  if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
+    return false;
+
+  OverlapIntervalsTy::iterator OII = IntervalMap.begin();
+  int64_t KillingStart = OII->second;
+  uint64_t KillingSize = OII->first - KillingStart;
+
+  assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
+
+  uint64_t Threshold = TTI.getMaxMemIntrinsicInlineSizeThreshold();
+
+  // __Front__                 ___Rear___
+  // | ------------- Dead ------------- |
+  //         | --- Killing --- |
+
+  if (KillingStart < DeadStart ||
+      uint64_t(KillingStart + KillingSize) > uint64_t(DeadStart + DeadSize))
+    return false;
+
+  auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
+  Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
+
+  // Assume Front is already correctly aligned.
+  uint64_t FrontSize = KillingStart - DeadStart;
+
+  int64_t RearStart =
+      alignDown(uint64_t(KillingStart + KillingSize), PrefAlign.value());
+  uint64_t RearSize = (DeadStart + DeadSize) - RearStart;
+
+  // If Front and Rear are both bigger than the threshold they won't be inlined
+  // so this seems like a bad idea. If Dead is smaller than the threshold it
+  // will be inlined so this isn't a good idea.
+  if ((FrontSize > Threshold && RearSize > Threshold) || DeadSize < Threshold)
+    return false;
+
+  Value *DeadWriteLength = DeadIntrinsic->getLength();
+  Value *DeadDest = DeadIntrinsic->getRawDest();
+
+  LLVM_DEBUG(dbgs() << "DSE: Split and shortened partially dead store: ["
+                    << DeadStart << ", " << DeadSize + DeadStart
+                    << "]\nInto: Front: [" << DeadStart << ", "
+                    << DeadStart + FrontSize << "], Rear: [" << RearStart
+                    << ", " << RearStart + RearSize << "]\n"
+                    << "Killer: [" << KillingStart << ", "
+                    << KillingSize + KillingStart << "]\n");
+
+  // Dead is now Front.
+  DeadIntrinsic->setLength(
+      ConstantInt::get(DeadWriteLength->getType(), FrontSize));
+  DeadIntrinsic->addDereferenceableParamAttr(0, FrontSize);
+
+  Value *Indices[1] = {ConstantInt::get(DeadWriteLength->getType(), RearStart)};
+  Instruction *RearDestGEP = GetElementPtrInst::CreateInBounds(
+      Type::getInt8Ty(DeadIntrinsic->getContext()), DeadDest, Indices, "",
+      DeadI);
+  auto *Rear = cast<AnyMemIntrinsic>(DeadIntrinsic->clone());
+  Rear->setDest(RearDestGEP);
+  Rear->setLength(ConstantInt::get(DeadWriteLength->getType(), RearSize));
+  Rear->insertAfter(RearDestGEP);
+  Rear->setDestAlignment(PrefAlign);
+  Rear->addDereferenceableParamAttr(0, RearSize);
+
+  shortenAssignment(DeadI, DeadDest, DeadStart * 8, DeadSize * 8, FrontSize * 8,
+                    true);
+
+  IntervalMap.erase(OII);
+  DeadSize = FrontSize;
+  return true;
+}
+
 static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
                          uint64_t &DeadSize, int64_t KillingStart,
                          uint64_t KillingSize, bool IsOverwriteEnd) {
@@ -821,6 +896,7 @@ struct DSEState {
   const TargetLibraryInfo &TLI;
   const DataLayout &DL;
   const LoopInfo &LI;
+  const TargetTransformInfo &TTI;
 
   // Whether the function contains any irreducible control flow, useful for
   // being accurately able to detect loops.
@@ -860,9 +936,10 @@ struct DSEState {
 
   DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
            PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
-           const LoopInfo &LI)
+           const LoopInfo &LI, const TargetTransformInfo &TTI)
       : F(F), AA(AA), EI(DT, &LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
-        PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
+        PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI),
+        TTI(TTI) {
     // Collect blocks with throwing instructions not modeled in MemorySSA and
     // alloc-like objects.
     unsigned PO = 0;
@@ -1978,7 +2055,7 @@ struct DSEState {
     return false;
   }
 
-  bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
+  bool removePartiallyOverlappedIntrinsicStores(InstOverlapIntervalsTy &IOL) {
     bool Changed = false;
     for (auto OI : IOL) {
       Instruction *DeadI = OI.first;
@@ -1994,6 +2071,9 @@ struct DSEState {
       if (IntervalMap.empty())
         continue;
       Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
+      if (IntervalMap.empty())
+        continue;
+      Changed |= tryToSplitMiddle(DeadI, IntervalMap, DeadStart, DeadSize, TTI);
     }
     return Changed;
   }
@@ -2059,10 +2139,11 @@ struct DSEState {
 static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
                                 DominatorTree &DT, PostDominatorTree &PDT,
                                 const TargetLibraryInfo &TLI,
-                                const LoopInfo &LI) {
+                                const LoopInfo &LI,
+                                const TargetTransformInfo &TTI) {
   bool MadeChange = false;
 
-  DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
+  DSEState State(F, AA, MSSA, DT, PDT, TLI, LI, TTI);
   // For each store:
   for (unsigned I = 0; I < State.MemDefs.size(); I++) {
     MemoryDef *KillingDef = State.MemDefs[I];
@@ -2226,7 +2307,7 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
 
   if (EnablePartialOverwriteTracking)
     for (auto &KV : State.IOLs)
-      MadeChange |= State.removePartiallyOverlappedStores(KV.second);
+      MadeChange |= State.removePartiallyOverlappedIntrinsicStores(KV.second);
 
   MadeChange |= State.eliminateRedundantStoresOfExistingValues();
   MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
@@ -2244,8 +2325,9 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
   MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
   PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
   LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
+  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
 
-  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
+  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI, TTI);
 
 #ifdef LLVM_ENABLE_STATS
   if (AreStatisticsEnabled())
diff --git a/llvm/test/DebugInfo/dse-split-memintrinsics.ll b/llvm/test/DebugInfo/dse-split-memintrinsics.ll
index 1decd22add665..bb586ec934ea6 100644
--- a/llvm/test/DebugInfo/dse-split-memintrinsics.ll
+++ b/llvm/test/DebugInfo/dse-split-memintrinsics.ll
@@ -10,11 +10,14 @@ define void @_Z22overwrite_middle_localv() !dbg !23 {
 ; CHECK-NEXT:    [[BLOB:%.*]] = alloca [1000 x i8], align 16, !DIAssignID [[DIASSIGNID33:![0-9]+]]
 ; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28:![0-9]+]], metadata !DIExpression(), metadata [[DIASSIGNID33]], metadata ptr [[BLOB]], metadata !DIExpression()), !dbg [[DBG34:![0-9]+]]
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 1000, ptr nonnull [[BLOB]]), !dbg [[DBG34]]
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) [[BLOB]], i8 5, i64 1000, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID35:![0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[BLOB]], i64 976
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(24) [[TMP0]], i8 5, i64 24, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID35:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(10) [[BLOB]], i8 5, i64 10, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID35]]
 ; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(), metadata [[DIASSIGNID35]], metadata ptr [[BLOB]], metadata !DIExpression()), !dbg [[DBG34]]
+; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7920), metadata [[META36:![0-9]+]], metadata ptr undef, metadata !DIExpression()), !dbg [[DBG34]]
 ; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[BLOB]], i64 10, !dbg [[DBG34]]
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 2 dereferenceable(980) [[ADD_PTR]], i8 3, i64 980, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID36:![0-9]+]]
-; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7840), metadata [[DIASSIGNID36]], metadata ptr [[ADD_PTR]], metadata !DIExpression()), !dbg [[DBG34]]
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 2 dereferenceable(980) [[ADD_PTR]], i8 3, i64 980, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID37:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7840), metadata [[DIASSIGNID37]], metadata ptr [[ADD_PTR]], metadata !DIExpression()), !dbg [[DBG34]]
 ; CHECK-NEXT:    call void @_Z3escPc(ptr noundef nonnull [[BLOB]]), !dbg [[DBG34]]
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 1000, ptr nonnull [[BLOB]]), !dbg [[DBG34]]
 ; CHECK-NEXT:    ret void, !dbg [[DBG34]]
@@ -113,5 +116,6 @@ declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata,
 ; CHECK: [[DIASSIGNID33]] = distinct !DIAssignID()
 ; CHECK: [[DBG34]] = !DILocation(line: 0, scope: [[DBG23]])
 ; CHECK: [[DIASSIGNID35]] = distinct !DIAssignID()
-; CHECK: [[DIASSIGNID36]] = distinct !DIAssignID()
+; CHECK: [[META36]] = distinct !DIAssignID()
+; CHECK: [[DIASSIGNID37]] = distinct !DIAssignID()
 ;.
diff --git a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
index 8d72deefc635b..cf74b6013e2ae 100644
--- a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
@@ -6,7 +6,9 @@ define dso_local void @overwrite_middle(ptr nocapture noundef writeonly %X) loca
 ; CHECK-LABEL: define dso_local void @overwrite_middle(
 ; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) [[X]], i8 5, i64 1000, i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 976
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(24) [[TMP0]], i8 5, i64 24, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(10) [[X]], i8 5, i64 10, i1 false)
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 10
 ; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(980) [[ARRAYIDX]], i8 3, i64 980, i1 false)
 ; CHECK-NEXT:    ret void
@@ -22,7 +24,9 @@ define dso_local void @overwrite_middle2(ptr nocapture noundef writeonly %X) loc
 ; CHECK-LABEL: define dso_local void @overwrite_middle2(
 ; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(1000) [[X]], i8 5, i64 1000, i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 990
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) [[TMP0]], i8 5, i64 10, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) [[X]], i8 5, i64 10, i1 false)
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 10
 ; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(980) [[ARRAYIDX]], i8 3, i64 980, i1 false)
 ; CHECK-NEXT:    ret void

>From 9fc2f5782d4c6da4d2361be6b15ae15dcbf56376 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Fri, 15 Dec 2023 13:30:17 +0000
Subject: [PATCH 03/10] Address review comments

- Remove unnecessary array
- Simplify debuginfo test
- Add tests to cover the profitability check
-
---
 .../Scalar/DeadStoreElimination.cpp           |  3 +-
 .../test/DebugInfo/dse-split-memintrinsics.ll |  8 ----
 .../DeadStoreElimination/dead-middle-split.ll | 47 +++++++++++++++----
 3 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 57ff807745cba..6766cf6df61ed 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -610,9 +610,8 @@ static bool tryToSplitMiddle(Instruction *DeadI,
       ConstantInt::get(DeadWriteLength->getType(), FrontSize));
   DeadIntrinsic->addDereferenceableParamAttr(0, FrontSize);
 
-  Value *Indices[1] = {ConstantInt::get(DeadWriteLength->getType(), RearStart)};
   Instruction *RearDestGEP = GetElementPtrInst::CreateInBounds(
-      Type::getInt8Ty(DeadIntrinsic->getContext()), DeadDest, Indices, "",
+      Type::getInt8Ty(DeadIntrinsic->getContext()), DeadDest, ConstantInt::get(DeadWriteLength->getType(), RearStart), "",
       DeadI);
   auto *Rear = cast<AnyMemIntrinsic>(DeadIntrinsic->clone());
   Rear->setDest(RearDestGEP);
diff --git a/llvm/test/DebugInfo/dse-split-memintrinsics.ll b/llvm/test/DebugInfo/dse-split-memintrinsics.ll
index bb586ec934ea6..f51f8ce0e6f80 100644
--- a/llvm/test/DebugInfo/dse-split-memintrinsics.ll
+++ b/llvm/test/DebugInfo/dse-split-memintrinsics.ll
@@ -9,7 +9,6 @@ define void @_Z22overwrite_middle_localv() !dbg !23 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[BLOB:%.*]] = alloca [1000 x i8], align 16, !DIAssignID [[DIASSIGNID33:![0-9]+]]
 ; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28:![0-9]+]], metadata !DIExpression(), metadata [[DIASSIGNID33]], metadata ptr [[BLOB]], metadata !DIExpression()), !dbg [[DBG34:![0-9]+]]
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 1000, ptr nonnull [[BLOB]]), !dbg [[DBG34]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[BLOB]], i64 976
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(24) [[TMP0]], i8 5, i64 24, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID35:![0-9]+]]
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(10) [[BLOB]], i8 5, i64 10, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID35]]
@@ -19,31 +18,24 @@ define void @_Z22overwrite_middle_localv() !dbg !23 {
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 2 dereferenceable(980) [[ADD_PTR]], i8 3, i64 980, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID37:![0-9]+]]
 ; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7840), metadata [[DIASSIGNID37]], metadata ptr [[ADD_PTR]], metadata !DIExpression()), !dbg [[DBG34]]
 ; CHECK-NEXT:    call void @_Z3escPc(ptr noundef nonnull [[BLOB]]), !dbg [[DBG34]]
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 1000, ptr nonnull [[BLOB]]), !dbg [[DBG34]]
 ; CHECK-NEXT:    ret void, !dbg [[DBG34]]
 ;
 entry:
   %blob = alloca [1000 x i8], align 16, !DIAssignID !33
   call void @llvm.dbg.assign(metadata i1 undef, metadata !28, metadata !DIExpression(), metadata !33, metadata ptr %blob, metadata !DIExpression()), !dbg !34
-  call void @llvm.lifetime.start.p0(i64 1000, ptr nonnull %blob), !dbg !34
   call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) %blob, i8 5, i64 1000, i1 false), !dbg !34, !DIAssignID !35
   call void @llvm.dbg.assign(metadata i1 undef, metadata !28, metadata !DIExpression(), metadata !35, metadata ptr %blob, metadata !DIExpression()), !dbg !34
   %add.ptr = getelementptr inbounds i8, ptr %blob, i64 10, !dbg !34
   call void @llvm.memset.p0.i64(ptr noundef nonnull align 2 dereferenceable(980) %add.ptr, i8 3, i64 980, i1 false), !dbg !34, !DIAssignID !36
   call void @llvm.dbg.assign(metadata i1 undef, metadata !28, metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7840), metadata !36, metadata ptr %add.ptr, metadata !DIExpression()), !dbg !34
   call void @_Z3escPc(ptr noundef nonnull %blob), !dbg !34
-  call void @llvm.lifetime.end.p0(i64 1000, ptr nonnull %blob), !dbg !34
   ret void, !dbg !34
 }
 
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
-
 declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
 
 declare void @_Z3escPc(ptr noundef)
 
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
-
 declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata)
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
index cf74b6013e2ae..8b20fa346bb66 100644
--- a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
@@ -2,9 +2,9 @@
 
 ; RUN: opt < %s -passes=dse -S | FileCheck %s
 
-define dso_local void @overwrite_middle(ptr nocapture noundef writeonly %X) local_unnamed_addr #0 {
-; CHECK-LABEL: define dso_local void @overwrite_middle(
-; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) local_unnamed_addr {
+define void @overwrite_middle(ptr nocapture noundef writeonly %X) {
+; CHECK-LABEL: define void @overwrite_middle(
+; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 976
 ; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(24) [[TMP0]], i8 5, i64 24, i1 false)
@@ -20,9 +20,9 @@ entry:
   ret void
 }
 
-define dso_local void @overwrite_middle2(ptr nocapture noundef writeonly %X) local_unnamed_addr #0 {
-; CHECK-LABEL: define dso_local void @overwrite_middle2(
-; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) local_unnamed_addr {
+define void @overwrite_middle2(ptr nocapture noundef writeonly %X) {
+; CHECK-LABEL: define void @overwrite_middle2(
+; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 990
 ; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) [[TMP0]], i8 5, i64 10, i1 false)
@@ -38,6 +38,37 @@ entry:
   ret void
 }
 
-; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
-declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1
+define void @front_and_rear_bigger_than_threshold(ptr nocapture noundef writeonly %X) {
+; CHECK-LABEL: define void @front_and_rear_bigger_than_threshold(
+; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(1000) [[X]], i8 5, i64 1000, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 65
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(800) [[ARRAYIDX]], i8 3, i64 800, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(1000) %X, i8 5, i64 1000, i1 false)
+  %arrayidx = getelementptr inbounds i8, ptr %X, i64 65
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(800) %arrayidx, i8 3, i64 800, i1 false)
+  ret void
+}
+
+define void @dead_smaller_than_threshold(ptr nocapture noundef writeonly %X) {
+; CHECK-LABEL: define void @dead_smaller_than_threshold(
+; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(50) [[X]], i8 5, i64 50, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 10
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(45) [[ARRAYIDX]], i8 3, i64 25, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(50) %X, i8 5, i64 50, i1 false)
+  %arrayidx = getelementptr inbounds i8, ptr %X, i64 10
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(45) %arrayidx, i8 3, i64 25, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
 

>From c9114b5cafdebc1a07d46da823568f101b12d34b Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Fri, 15 Dec 2023 13:44:10 +0000
Subject: [PATCH 04/10] Clang format patch

---
 llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 6766cf6df61ed..e85bc1b3478a2 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -611,8 +611,8 @@ static bool tryToSplitMiddle(Instruction *DeadI,
   DeadIntrinsic->addDereferenceableParamAttr(0, FrontSize);
 
   Instruction *RearDestGEP = GetElementPtrInst::CreateInBounds(
-      Type::getInt8Ty(DeadIntrinsic->getContext()), DeadDest, ConstantInt::get(DeadWriteLength->getType(), RearStart), "",
-      DeadI);
+      Type::getInt8Ty(DeadIntrinsic->getContext()), DeadDest,
+      ConstantInt::get(DeadWriteLength->getType(), RearStart), "", DeadI);
   auto *Rear = cast<AnyMemIntrinsic>(DeadIntrinsic->clone());
   Rear->setDest(RearDestGEP);
   Rear->setLength(ConstantInt::get(DeadWriteLength->getType(), RearSize));

>From b52e32b30f85c9b7353b415a00a8e670d2a49eb8 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Tue, 30 Jan 2024 11:32:32 +0000
Subject: [PATCH 05/10] Add check for atomic intrinsics

---
 .../Scalar/DeadStoreElimination.cpp           | 18 ++++++++++++-----
 .../DeadStoreElimination/dead-middle-split.ll | 20 ++++++++++++++++++-
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index e85bc1b3478a2..fb8a4e5f6f12e 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -574,8 +574,8 @@ static bool tryToSplitMiddle(Instruction *DeadI,
   // | ------------- Dead ------------- |
   //         | --- Killing --- |
 
-  if (KillingStart < DeadStart ||
-      uint64_t(KillingStart + KillingSize) > uint64_t(DeadStart + DeadSize))
+  if (KillingStart <= DeadStart ||
+      uint64_t(KillingStart + KillingSize) >= uint64_t(DeadStart + DeadSize))
     return false;
 
   auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
@@ -594,6 +594,14 @@ static bool tryToSplitMiddle(Instruction *DeadI,
   if ((FrontSize > Threshold && RearSize > Threshold) || DeadSize < Threshold)
     return false;
 
+  if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
+    // When shortening an atomic memory intrinsic the size of Front and Rear
+    // must be a multiple of the element size.
+    const uint32_t ElementSize = AMI->getElementSizeInBytes();
+    if (FrontSize % ElementSize != 0 || RearSize % ElementSize != 0)
+      return false;
+  }
+
   Value *DeadWriteLength = DeadIntrinsic->getLength();
   Value *DeadDest = DeadIntrinsic->getRawDest();
 
@@ -2066,13 +2074,13 @@ struct DSEState {
       uint64_t DeadSize = Loc.Size.getValue();
       GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
       OverlapIntervalsTy &IntervalMap = OI.second;
-      Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
+      Changed |= tryToSplitMiddle(DeadI, IntervalMap, DeadStart, DeadSize, TTI);
       if (IntervalMap.empty())
         continue;
-      Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
+      Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
       if (IntervalMap.empty())
         continue;
-      Changed |= tryToSplitMiddle(DeadI, IntervalMap, DeadStart, DeadSize, TTI);
+      Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
     }
     return Changed;
   }
diff --git a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
index 8b20fa346bb66..80bc38fc74023 100644
--- a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
@@ -70,5 +70,23 @@ entry:
   ret void
 }
 
-declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
+define void @dontwrite28to32memset_atomic(ptr nocapture %X) {
+; CHECK-LABEL: define void @dontwrite28to32memset_atomic(
+; CHECK-SAME: ptr nocapture [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 976
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 dereferenceable(24) [[TMP0]], i8 5, i64 24, i32 4)
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 dereferenceable(12) [[X]], i8 5, i64 12, i32 4)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 12
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 [[ARRAYIDX]], i8 3, i64 978, i32 4)
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 %X, i8 5, i64 1000, i32 4)
+  %arrayidx = getelementptr inbounds i8, ptr %X, i64 12
+  call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 %arrayidx, i8 3, i64 978, i32 4)
+  ret void
+}
 
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
+declare void @llvm.memset.element.unordered.atomic.p0.i64(ptr, i8, i64, i32)

>From 001b02b93d3f394a3796478cf289e995b9746d45 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Mon, 12 Feb 2024 13:57:18 +0000
Subject: [PATCH 06/10] Empty commit to re-trigger CI


>From ec23d139700b5ca851b398619ac3474d2bdd8704 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Mon, 29 Apr 2024 12:09:42 +0100
Subject: [PATCH 07/10] Remove (DeadSize >= Threshold) requirement

---
 llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index fb8a4e5f6f12e..c787616229238 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -589,9 +589,8 @@ static bool tryToSplitMiddle(Instruction *DeadI,
   uint64_t RearSize = (DeadStart + DeadSize) - RearStart;
 
   // If Front and Rear are both bigger than the threshold they won't be inlined
-  // so this seems like a bad idea. If Dead is smaller than the threshold it
-  // will be inlined so this isn't a good idea.
-  if ((FrontSize > Threshold && RearSize > Threshold) || DeadSize < Threshold)
+  // in which case we want to bail out.
+  if (FrontSize > Threshold && RearSize > Threshold)
     return false;
 
   if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {

>From bf68f33e86a4f26558a758c733e58b2f53a73256 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Mon, 29 Apr 2024 13:53:10 +0100
Subject: [PATCH 08/10] Update tests and add comment

---
 .../Scalar/DeadStoreElimination.cpp           |  1 +
 .../DeadStoreElimination/dead-middle-split.ll |  4 ++-
 .../stores-of-existing-values.ll              | 36 ++++++++++---------
 llvm/test/Transforms/MemCpyOpt/memcpy.ll      |  8 +++--
 4 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index c787616229238..205af3b08347e 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -590,6 +590,7 @@ static bool tryToSplitMiddle(Instruction *DeadI,
 
   // If Front and Rear are both bigger than the threshold they won't be inlined
   // in which case we want to bail out.
+  // TODO: This is probably too restrictive.
   if (FrontSize > Threshold && RearSize > Threshold)
     return false;
 
diff --git a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
index 80bc38fc74023..508e1e93f777c 100644
--- a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
@@ -58,7 +58,9 @@ define void @dead_smaller_than_threshold(ptr nocapture noundef writeonly %X) {
 ; CHECK-LABEL: define void @dead_smaller_than_threshold(
 ; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(50) [[X]], i8 5, i64 50, i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 35
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(15) [[TMP0]], i8 5, i64 15, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) [[X]], i8 5, i64 10, i1 false)
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 10
 ; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(45) [[ARRAYIDX]], i8 3, i64 25, i1 false)
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
index c9a0943de8cd9..16635bba320f2 100644
--- a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
@@ -411,7 +411,11 @@ declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1
 
 define void @test12_memset_simple(ptr %ptr) {
 ; CHECK-LABEL: @test12_memset_simple(
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[PTR:%.*]], i8 0, i64 10, i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 5
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(5) [[TMP1]], i8 0, i64 5, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr dereferenceable(4) [[PTR]], i8 0, i64 4, i1 false)
+; CHECK-NEXT:    [[PTR_5:%.*]] = getelementptr i8, ptr [[PTR]], i64 4
+; CHECK-NEXT:    store i8 0, ptr [[PTR_5]], align 1
 ; CHECK-NEXT:    ret void
 ;
   call void @llvm.memset.p0.i64(ptr %ptr, i8 0, i64 10, i1 false)
@@ -421,19 +425,15 @@ define void @test12_memset_simple(ptr %ptr) {
 }
 
 define void @test12_memset_other_store_in_between(ptr %ptr) {
-; UNOPT-LABEL: @test12_memset_other_store_in_between(
-; UNOPT-NEXT:    call void @llvm.memset.p0.i64(ptr [[PTR:%.*]], i8 0, i64 10, i1 false)
-; UNOPT-NEXT:    [[PTR_4:%.*]] = getelementptr i8, ptr [[PTR]], i64 4
-; UNOPT-NEXT:    store i8 8, ptr [[PTR_4]], align 1
-; UNOPT-NEXT:    [[PTR_5:%.*]] = getelementptr i8, ptr [[PTR]], i64 5
-; UNOPT-NEXT:    store i8 0, ptr [[PTR_5]], align 1
-; UNOPT-NEXT:    ret void
-;
-; OPT-LABEL: @test12_memset_other_store_in_between(
-; OPT-NEXT:    call void @llvm.memset.p0.i64(ptr [[PTR:%.*]], i8 0, i64 10, i1 false)
-; OPT-NEXT:    [[PTR_4:%.*]] = getelementptr i8, ptr [[PTR]], i64 4
-; OPT-NEXT:    store i8 8, ptr [[PTR_4]], align 1
-; OPT-NEXT:    ret void
+; CHECK-LABEL: @test12_memset_other_store_in_between(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 6
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(4) [[TMP1]], i8 0, i64 4, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr dereferenceable(4) [[PTR]], i8 0, i64 4, i1 false)
+; CHECK-NEXT:    [[PTR_4:%.*]] = getelementptr i8, ptr [[PTR]], i64 4
+; CHECK-NEXT:    store i8 8, ptr [[PTR_4]], align 1
+; CHECK-NEXT:    [[PTR_5:%.*]] = getelementptr i8, ptr [[PTR]], i64 5
+; CHECK-NEXT:    store i8 0, ptr [[PTR_5]], align 1
+; CHECK-NEXT:    ret void
 ;
   call void @llvm.memset.p0.i64(ptr %ptr, i8 0, i64 10, i1 false)
   %ptr.4 = getelementptr i8, ptr %ptr, i64 4
@@ -514,7 +514,9 @@ define void @test12_memset_chk_other_store_in_between_stack_obj_escape_before(i6
 
 define void @test12_memset_other_store_in_between_partial_overlap(ptr %ptr) {
 ; CHECK-LABEL: @test12_memset_other_store_in_between_partial_overlap(
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[PTR:%.*]], i8 0, i64 10, i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 7
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(3) [[TMP1]], i8 0, i64 3, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr dereferenceable(4) [[PTR]], i8 0, i64 4, i1 false)
 ; CHECK-NEXT:    [[PTR_4:%.*]] = getelementptr i8, ptr [[PTR]], i64 4
 ; CHECK-NEXT:    store i16 8, ptr [[PTR_4]], align 2
 ; CHECK-NEXT:    [[PTR_5:%.*]] = getelementptr i8, ptr [[PTR]], i64 5
@@ -531,7 +533,9 @@ define void @test12_memset_other_store_in_between_partial_overlap(ptr %ptr) {
 
 define void @test12_memset_later_store_exceeds_memset(ptr %ptr) {
 ; CHECK-LABEL: @test12_memset_later_store_exceeds_memset(
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 [[PTR:%.*]], i8 0, i64 8, i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 5
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(5) [[TMP1]], i8 0, i64 5, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr dereferenceable(4) [[PTR]], i8 0, i64 4, i1 false)
 ; CHECK-NEXT:    [[PTR_4:%.*]] = getelementptr i8, ptr [[PTR]], i64 4
 ; CHECK-NEXT:    store i8 8, ptr [[PTR_4]], align 1
 ; CHECK-NEXT:    [[PTR_5:%.*]] = getelementptr i8, ptr [[PTR]], i64 8
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
index cad2170b271e2..b84a278958b85 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
@@ -246,7 +246,9 @@ define i32 @test5(i32 %x) nounwind ssp {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[Y:%.*]] = alloca [[STRUCT_S:%.*]], align 16
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[Y]], ptr align 16 @sS, i64 32, i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[Y]], i64 16
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 dereferenceable(16) [[TMP0]], ptr align 16 @sS, i64 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 dereferenceable(16) [[Y]], ptr align 16 @sS, i64 16, i1 false)
 ; CHECK-NEXT:    [[A:%.*]] = getelementptr [[STRUCT_S]], ptr [[Y]], i64 0, i32 1, i64 0
 ; CHECK-NEXT:    store i8 4, ptr [[A]], align 1
 ; CHECK-NEXT:    call void @test5a(ptr byval([[STRUCT_S]]) align 16 [[Y]])
@@ -699,7 +701,7 @@ define void @immut_valid_align_branched(i1 %c, ptr noalias align 4 %val) {
 ; Merge/drop noalias metadata when replacing parameter.
 define void @immut_param_noalias_metadata(ptr align 4 byval(i32) %ptr) {
 ; CHECK-LABEL: @immut_param_noalias_metadata(
-; CHECK-NEXT:    store i32 1, ptr [[PTR:%.*]], align 4, !noalias !0
+; CHECK-NEXT:    store i32 1, ptr [[PTR:%.*]], align 4, !noalias [[META0:![0-9]+]]
 ; CHECK-NEXT:    call void @f(ptr noalias nocapture readonly [[PTR]])
 ; CHECK-NEXT:    ret void
 ;
@@ -712,7 +714,7 @@ define void @immut_param_noalias_metadata(ptr align 4 byval(i32) %ptr) {
 
 define void @byval_param_noalias_metadata(ptr align 4 byval(i32) %ptr) {
 ; CHECK-LABEL: @byval_param_noalias_metadata(
-; CHECK-NEXT:    store i32 1, ptr [[PTR:%.*]], align 4, !noalias !0
+; CHECK-NEXT:    store i32 1, ptr [[PTR:%.*]], align 4, !noalias [[META0]]
 ; CHECK-NEXT:    call void @f_byval(ptr byval(i32) align 4 [[PTR]])
 ; CHECK-NEXT:    ret void
 ;

>From 1805544b4472b4fb578c5494cbadbf56c1855333 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Tue, 4 Jun 2024 10:50:36 +0100
Subject: [PATCH 09/10] Prevent splitting if aligning causes all benefits to be
 lost

---
 llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 2 +-
 llvm/test/Transforms/MemCpyOpt/memcpy.ll            | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 205af3b08347e..5807a96b6ce48 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -591,7 +591,7 @@ static bool tryToSplitMiddle(Instruction *DeadI,
   // If Front and Rear are both bigger than the threshold they won't be inlined
   // in which case we want to bail out.
   // TODO: This is probably too restrictive.
-  if (FrontSize > Threshold && RearSize > Threshold)
+  if ((uint64_t) RearStart == (DeadStart + FrontSize) || (FrontSize > Threshold && RearSize > Threshold))
     return false;
 
   if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
index b84a278958b85..d0ba85847848d 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
@@ -246,9 +246,7 @@ define i32 @test5(i32 %x) nounwind ssp {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[Y:%.*]] = alloca [[STRUCT_S:%.*]], align 16
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[Y]], i64 16
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 dereferenceable(16) [[TMP0]], ptr align 16 @sS, i64 16, i1 false)
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 dereferenceable(16) [[Y]], ptr align 16 @sS, i64 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[Y]], ptr align 16 @sS, i64 32, i1 false)
 ; CHECK-NEXT:    [[A:%.*]] = getelementptr [[STRUCT_S]], ptr [[Y]], i64 0, i32 1, i64 0
 ; CHECK-NEXT:    store i8 4, ptr [[A]], align 1
 ; CHECK-NEXT:    call void @test5a(ptr byval([[STRUCT_S]]) align 16 [[Y]])

>From bc32bcb6d8e7c10a8a141370540fda3adf932d30 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Tue, 4 Jun 2024 10:51:16 +0100
Subject: [PATCH 10/10] Make sure that other transforms operate on the correct
 intrinsic after splitting

---
 .../Transforms/Scalar/DeadStoreElimination.cpp | 10 +++++++++-
 .../stores-of-existing-values.ll               | 18 +++++++++---------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 5807a96b6ce48..fcbca9cbb4be2 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -620,7 +620,7 @@ static bool tryToSplitMiddle(Instruction *DeadI,
 
   Instruction *RearDestGEP = GetElementPtrInst::CreateInBounds(
       Type::getInt8Ty(DeadIntrinsic->getContext()), DeadDest,
-      ConstantInt::get(DeadWriteLength->getType(), RearStart), "", DeadI);
+      ConstantInt::get(DeadWriteLength->getType(), RearStart), "rear", DeadI);
   auto *Rear = cast<AnyMemIntrinsic>(DeadIntrinsic->clone());
   Rear->setDest(RearDestGEP);
   Rear->setLength(ConstantInt::get(DeadWriteLength->getType(), RearSize));
@@ -633,6 +633,14 @@ static bool tryToSplitMiddle(Instruction *DeadI,
 
   IntervalMap.erase(OII);
   DeadSize = FrontSize;
+
+  // Make sure that the other transforms operate on the correct intrinsic after splitting.
+  if (!IntervalMap.empty() && IntervalMap.begin()->second >= RearStart) {
+    DeadI = Rear;
+    DeadSize = RearSize;
+    DeadStart = RearStart;
+  }
+
   return true;
 }
 
diff --git a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
index 16635bba320f2..899564dcf118f 100644
--- a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
@@ -411,8 +411,8 @@ declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1
 
 define void @test12_memset_simple(ptr %ptr) {
 ; CHECK-LABEL: @test12_memset_simple(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 5
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(5) [[TMP1]], i8 0, i64 5, i1 false)
+; CHECK-NEXT:    [[REAR:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 5
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(5) [[REAR]], i8 0, i64 5, i1 false)
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr dereferenceable(4) [[PTR]], i8 0, i64 4, i1 false)
 ; CHECK-NEXT:    [[PTR_5:%.*]] = getelementptr i8, ptr [[PTR]], i64 4
 ; CHECK-NEXT:    store i8 0, ptr [[PTR_5]], align 1
@@ -426,8 +426,8 @@ define void @test12_memset_simple(ptr %ptr) {
 
 define void @test12_memset_other_store_in_between(ptr %ptr) {
 ; CHECK-LABEL: @test12_memset_other_store_in_between(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 6
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(4) [[TMP1]], i8 0, i64 4, i1 false)
+; CHECK-NEXT:    [[REAR:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 6
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(4) [[REAR]], i8 0, i64 4, i1 false)
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr dereferenceable(4) [[PTR]], i8 0, i64 4, i1 false)
 ; CHECK-NEXT:    [[PTR_4:%.*]] = getelementptr i8, ptr [[PTR]], i64 4
 ; CHECK-NEXT:    store i8 8, ptr [[PTR_4]], align 1
@@ -514,8 +514,8 @@ define void @test12_memset_chk_other_store_in_between_stack_obj_escape_before(i6
 
 define void @test12_memset_other_store_in_between_partial_overlap(ptr %ptr) {
 ; CHECK-LABEL: @test12_memset_other_store_in_between_partial_overlap(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 7
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(3) [[TMP1]], i8 0, i64 3, i1 false)
+; CHECK-NEXT:    [[REAR:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 7
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(3) [[REAR]], i8 0, i64 3, i1 false)
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr dereferenceable(4) [[PTR]], i8 0, i64 4, i1 false)
 ; CHECK-NEXT:    [[PTR_4:%.*]] = getelementptr i8, ptr [[PTR]], i64 4
 ; CHECK-NEXT:    store i16 8, ptr [[PTR_4]], align 2
@@ -533,9 +533,9 @@ define void @test12_memset_other_store_in_between_partial_overlap(ptr %ptr) {
 
 define void @test12_memset_later_store_exceeds_memset(ptr %ptr) {
 ; CHECK-LABEL: @test12_memset_later_store_exceeds_memset(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 5
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(5) [[TMP1]], i8 0, i64 5, i1 false)
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr dereferenceable(4) [[PTR]], i8 0, i64 4, i1 false)
+; CHECK-NEXT:    [[REAR:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 5
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(5) [[REAR]], i8 0, i64 5, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 dereferenceable(4) [[PTR]], i8 0, i64 3, i1 false)
 ; CHECK-NEXT:    [[PTR_4:%.*]] = getelementptr i8, ptr [[PTR]], i64 4
 ; CHECK-NEXT:    store i8 8, ptr [[PTR_4]], align 1
 ; CHECK-NEXT:    [[PTR_5:%.*]] = getelementptr i8, ptr [[PTR]], i64 8



More information about the llvm-commits mailing list