[llvm] [DSE] Split memory intrinsics if they are dead in the middle (PR #75478)

Nabeel Omer via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 29 04:10:05 PDT 2024


https://github.com/omern1 updated https://github.com/llvm/llvm-project/pull/75478

>From 5b68d4dfcdaff475dcee4f29818ba16ab317a507 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Thu, 14 Dec 2023 18:01:39 +0000
Subject: [PATCH 1/7] Pre-committed tests

---
 .../test/DebugInfo/dse-split-memintrinsics.ll | 117 ++++++++++++++++++
 .../DeadStoreElimination/dead-middle-split.ll |  39 ++++++
 2 files changed, 156 insertions(+)
 create mode 100644 llvm/test/DebugInfo/dse-split-memintrinsics.ll
 create mode 100644 llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll

diff --git a/llvm/test/DebugInfo/dse-split-memintrinsics.ll b/llvm/test/DebugInfo/dse-split-memintrinsics.ll
new file mode 100644
index 00000000000000..1decd22add665b
--- /dev/null
+++ b/llvm/test/DebugInfo/dse-split-memintrinsics.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=dse -S | FileCheck %s
+
+; Check a dbg.assign is inserted that sets the dead middle bits to no-location (see tryToSplitMiddle).
+
+define void @_Z22overwrite_middle_localv() !dbg !23 {
+; CHECK-LABEL: define void @_Z22overwrite_middle_localv(
+; CHECK-SAME: ) !dbg [[DBG23:![0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BLOB:%.*]] = alloca [1000 x i8], align 16, !DIAssignID [[DIASSIGNID33:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28:![0-9]+]], metadata !DIExpression(), metadata [[DIASSIGNID33]], metadata ptr [[BLOB]], metadata !DIExpression()), !dbg [[DBG34:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 1000, ptr nonnull [[BLOB]]), !dbg [[DBG34]]
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) [[BLOB]], i8 5, i64 1000, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID35:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(), metadata [[DIASSIGNID35]], metadata ptr [[BLOB]], metadata !DIExpression()), !dbg [[DBG34]]
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[BLOB]], i64 10, !dbg [[DBG34]]
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 2 dereferenceable(980) [[ADD_PTR]], i8 3, i64 980, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID36:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7840), metadata [[DIASSIGNID36]], metadata ptr [[ADD_PTR]], metadata !DIExpression()), !dbg [[DBG34]]
+; CHECK-NEXT:    call void @_Z3escPc(ptr noundef nonnull [[BLOB]]), !dbg [[DBG34]]
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 1000, ptr nonnull [[BLOB]]), !dbg [[DBG34]]
+; CHECK-NEXT:    ret void, !dbg [[DBG34]]
+;
+entry:
+  %blob = alloca [1000 x i8], align 16, !DIAssignID !33
+  call void @llvm.dbg.assign(metadata i1 undef, metadata !28, metadata !DIExpression(), metadata !33, metadata ptr %blob, metadata !DIExpression()), !dbg !34
+  call void @llvm.lifetime.start.p0(i64 1000, ptr nonnull %blob), !dbg !34
+  call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) %blob, i8 5, i64 1000, i1 false), !dbg !34, !DIAssignID !35
+  call void @llvm.dbg.assign(metadata i1 undef, metadata !28, metadata !DIExpression(), metadata !35, metadata ptr %blob, metadata !DIExpression()), !dbg !34
+  %add.ptr = getelementptr inbounds i8, ptr %blob, i64 10, !dbg !34
+  call void @llvm.memset.p0.i64(ptr noundef nonnull align 2 dereferenceable(980) %add.ptr, i8 3, i64 980, i1 false), !dbg !34, !DIAssignID !36
+  call void @llvm.dbg.assign(metadata i1 undef, metadata !28, metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7840), metadata !36, metadata ptr %add.ptr, metadata !DIExpression()), !dbg !34
+  call void @_Z3escPc(ptr noundef nonnull %blob), !dbg !34
+  call void @llvm.lifetime.end.p0(i64 1000, ptr nonnull %blob), !dbg !34
+  ret void, !dbg !34
+}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
+
+declare void @_Z3escPc(ptr noundef)
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!15, !16, !17, !18, !19, !20, !21}
+!llvm.ident = !{!22}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, imports: !2, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "debuginfo.cpp", directory: "/home", checksumkind: CSK_MD5, checksum: "3dc84462c14a3d86dd372d0473fa13aa")
+!2 = !{!3}
+!3 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !4, entity: !5, file: !14, line: 81)
+!4 = !DINamespace(name: "std", scope: null)
+!5 = !DISubprogram(name: "memset", scope: !6, file: !6, line: 61, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
+!6 = !DIFile(filename: "/usr/include/string.h", directory: "", checksumkind: CSK_MD5, checksum: "3fc3efdf2e52b973f380a6e7608374ff")
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9, !9, !10, !11}
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", file: !12, line: 13, baseType: !13)
+!12 = !DIFile(filename: "build_upstream/lib/clang/18/include/__stddef_size_t.h", directory: "/home", checksumkind: CSK_MD5, checksum: "405db6ea5fb824de326715f26fa9fab5")
+!13 = !DIBasicType(name: "unsigned long", size: 64, encoding: DW_ATE_unsigned)
+!14 = !DIFile(filename: "/usr/lib64/gcc/x86_64-suse-linux/13/../../../../include/c++/13/cstring", directory: "")
+!15 = !{i32 7, !"Dwarf Version", i32 5}
+!16 = !{i32 2, !"Debug Info Version", i32 3}
+!17 = !{i32 1, !"wchar_size", i32 4}
+!18 = !{i32 8, !"PIC Level", i32 2}
+!19 = !{i32 7, !"PIE Level", i32 2}
+!20 = !{i32 7, !"uwtable", i32 2}
+!21 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+!22 = !{!"clang version 18.0.0"}
+!23 = distinct !DISubprogram(name: "overwrite_middle_local", linkageName: "_Z22overwrite_middle_localv", scope: !24, file: !24, line: 3, type: !25, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !27)
+!24 = !DIFile(filename: "./debuginfo.cpp", directory: "/home", checksumkind: CSK_MD5, checksum: "3dc84462c14a3d86dd372d0473fa13aa")
+!25 = !DISubroutineType(types: !26)
+!26 = !{null}
+!27 = !{!28}
+!28 = !DILocalVariable(name: "blob", scope: !23, file: !24, line: 4, type: !29)
+!29 = !DICompositeType(tag: DW_TAG_array_type, baseType: !30, size: 8000, elements: !31)
+!30 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!31 = !{!32}
+!32 = !DISubrange(count: 1000)
+!33 = distinct !DIAssignID()
+!34 = !DILocation(line: 0, scope: !23)
+!35 = distinct !DIAssignID()
+!36 = distinct !DIAssignID()
+;.
+; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, imports: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
+; CHECK: [[META1]] = !DIFile(filename: "debuginfo.cpp", directory: {{.*}})
+; CHECK: [[META2]] = !{[[META3:![0-9]+]]}
+; CHECK: [[META3]] = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: [[META4:![0-9]+]], entity: [[META5:![0-9]+]], file: [[META14:![0-9]+]], line: 81)
+; CHECK: [[META4]] = !DINamespace(name: "std", scope: null)
+; CHECK: [[META5]] = !DISubprogram(name: "memset", scope: [[META6:![0-9]+]], file: [[META6]], line: 61, type: [[META7:![0-9]+]], flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
+; CHECK: [[META6]] = !DIFile(filename: "/usr/include/string.h", directory: {{.*}})
+; CHECK: [[META7]] = !DISubroutineType(types: [[META8:![0-9]+]])
+; CHECK: [[META8]] = !{[[META9:![0-9]+]], [[META9]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
+; CHECK: [[META9]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
+; CHECK: [[META10]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+; CHECK: [[META11]] = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", file: [[META12:![0-9]+]], line: 13, baseType: [[META13:![0-9]+]])
+; CHECK: [[META12]] = !DIFile(filename: "build_upstream/lib/clang/18/include/__stddef_size_t.h", directory: {{.*}})
+; CHECK: [[META13]] = !DIBasicType(name: "unsigned long", size: 64, encoding: DW_ATE_unsigned)
+; CHECK: [[META14]] = !DIFile(filename: "/usr/lib64/gcc/x86_64-suse-linux/13/../../../../include/c++/13/cstring", directory: "")
+; CHECK: [[DBG23]] = distinct !DISubprogram(name: "overwrite_middle_local", linkageName: "_Z22overwrite_middle_localv", scope: [[META24:![0-9]+]], file: [[META24]], line: 3, type: [[META25:![0-9]+]], scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META27:![0-9]+]])
+; CHECK: [[META24]] = !DIFile(filename: "./debuginfo.cpp", directory: {{.*}})
+; CHECK: [[META25]] = !DISubroutineType(types: [[META26:![0-9]+]])
+; CHECK: [[META26]] = !{null}
+; CHECK: [[META27]] = !{[[META28]]}
+; CHECK: [[META28]] = !DILocalVariable(name: "blob", scope: [[DBG23]], file: [[META24]], line: 4, type: [[META29:![0-9]+]])
+; CHECK: [[META29]] = !DICompositeType(tag: DW_TAG_array_type, baseType: [[META30:![0-9]+]], size: 8000, elements: [[META31:![0-9]+]])
+; CHECK: [[META30]] = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+; CHECK: [[META31]] = !{[[META32:![0-9]+]]}
+; CHECK: [[META32]] = !DISubrange(count: 1000)
+; CHECK: [[DIASSIGNID33]] = distinct !DIAssignID()
+; CHECK: [[DBG34]] = !DILocation(line: 0, scope: [[DBG23]])
+; CHECK: [[DIASSIGNID35]] = distinct !DIAssignID()
+; CHECK: [[DIASSIGNID36]] = distinct !DIAssignID()
+;.
diff --git a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
new file mode 100644
index 00000000000000..8d72deefc635b2
--- /dev/null
+++ b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+
+; RUN: opt < %s -passes=dse -S | FileCheck %s
+
+define dso_local void @overwrite_middle(ptr nocapture noundef writeonly %X) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @overwrite_middle(
+; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) [[X]], i8 5, i64 1000, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 10
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(980) [[ARRAYIDX]], i8 3, i64 980, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) %X, i8 5, i64 1000, i1 false)
+  %arrayidx = getelementptr inbounds i8, ptr %X, i64 10
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(980) %arrayidx, i8 3, i64 980, i1 false)
+  ret void
+}
+
+define dso_local void @overwrite_middle2(ptr nocapture noundef writeonly %X) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @overwrite_middle2(
+; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(1000) [[X]], i8 5, i64 1000, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 10
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(980) [[ARRAYIDX]], i8 3, i64 980, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(1000) %X, i8 5, i64 1000, i1 false)
+  %arrayidx = getelementptr inbounds i8, ptr %X, i64 10
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(980) %arrayidx, i8 3, i64 980, i1 false)
+  ret void
+}
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1
+

>From 6f26953df74bb8a9c0d6d760870882af7db43670 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Thu, 14 Dec 2023 18:03:20 +0000
Subject: [PATCH 2/7] [DSE] Split memory intrinsics that are dead in the middle

DSE can already shorten intrinsics which have dead fronts or rears.
This patch enables DSE to split memory intrinsics that
are dead in the middle into `Front` and `Rear`:

```
  // __Front__                 ___Rear___
  // | ------------- Dead ------------- |
  //         | --- Killing --- |
```

Resolves #72113
---
 .../Scalar/DeadStoreElimination.cpp           | 96 +++++++++++++++++--
 .../test/DebugInfo/dse-split-memintrinsics.ll | 12 ++-
 .../DeadStoreElimination/dead-middle-split.ll |  8 +-
 3 files changed, 103 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 008dcc53fd44fc..57ff807745cba9 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -48,6 +48,7 @@
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/BasicBlock.h"
@@ -554,6 +555,80 @@ static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
   }
 }
 
+static bool tryToSplitMiddle(Instruction *DeadI,
+                             OverlapIntervalsTy &IntervalMap,
+                             int64_t &DeadStart, uint64_t &DeadSize,
+                             const TargetTransformInfo &TTI) {
+  if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
+    return false;
+
+  OverlapIntervalsTy::iterator OII = IntervalMap.begin();
+  int64_t KillingStart = OII->second;
+  uint64_t KillingSize = OII->first - KillingStart;
+
+  assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
+
+  uint64_t Threshold = TTI.getMaxMemIntrinsicInlineSizeThreshold();
+
+  // __Front__                 ___Rear___
+  // | ------------- Dead ------------- |
+  //         | --- Killing --- |
+
+  if (KillingStart < DeadStart ||
+      uint64_t(KillingStart + KillingSize) > uint64_t(DeadStart + DeadSize))
+    return false;
+
+  auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
+  Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
+
+  // Assume Front is already correctly aligned.
+  uint64_t FrontSize = KillingStart - DeadStart;
+
+  int64_t RearStart =
+      alignDown(uint64_t(KillingStart + KillingSize), PrefAlign.value());
+  uint64_t RearSize = (DeadStart + DeadSize) - RearStart;
+
+  // If Front and Rear are both bigger than the threshold they won't be inlined
+  // so this seems like a bad idea. If Dead is smaller than the threshold it
+  // will be inlined so this isn't a good idea.
+  if ((FrontSize > Threshold && RearSize > Threshold) || DeadSize < Threshold)
+    return false;
+
+  Value *DeadWriteLength = DeadIntrinsic->getLength();
+  Value *DeadDest = DeadIntrinsic->getRawDest();
+
+  LLVM_DEBUG(dbgs() << "DSE: Split and shortened partially dead store: ["
+                    << DeadStart << ", " << DeadSize + DeadStart
+                    << "]\nInto: Front: [" << DeadStart << ", "
+                    << DeadStart + FrontSize << "], Rear: [" << RearStart
+                    << ", " << RearStart + RearSize << "]\n"
+                    << "Killer: [" << KillingStart << ", "
+                    << KillingSize + KillingStart << "]\n");
+
+  // Dead is now Front.
+  DeadIntrinsic->setLength(
+      ConstantInt::get(DeadWriteLength->getType(), FrontSize));
+  DeadIntrinsic->addDereferenceableParamAttr(0, FrontSize);
+
+  Value *Indices[1] = {ConstantInt::get(DeadWriteLength->getType(), RearStart)};
+  Instruction *RearDestGEP = GetElementPtrInst::CreateInBounds(
+      Type::getInt8Ty(DeadIntrinsic->getContext()), DeadDest, Indices, "",
+      DeadI);
+  auto *Rear = cast<AnyMemIntrinsic>(DeadIntrinsic->clone());
+  Rear->setDest(RearDestGEP);
+  Rear->setLength(ConstantInt::get(DeadWriteLength->getType(), RearSize));
+  Rear->insertAfter(RearDestGEP);
+  Rear->setDestAlignment(PrefAlign);
+  Rear->addDereferenceableParamAttr(0, RearSize);
+
+  shortenAssignment(DeadI, DeadDest, DeadStart * 8, DeadSize * 8, FrontSize * 8,
+                    true);
+
+  IntervalMap.erase(OII);
+  DeadSize = FrontSize;
+  return true;
+}
+
 static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
                          uint64_t &DeadSize, int64_t KillingStart,
                          uint64_t KillingSize, bool IsOverwriteEnd) {
@@ -821,6 +896,7 @@ struct DSEState {
   const TargetLibraryInfo &TLI;
   const DataLayout &DL;
   const LoopInfo &LI;
+  const TargetTransformInfo &TTI;
 
   // Whether the function contains any irreducible control flow, useful for
   // being accurately able to detect loops.
@@ -860,9 +936,10 @@ struct DSEState {
 
   DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
            PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
-           const LoopInfo &LI)
+           const LoopInfo &LI, const TargetTransformInfo &TTI)
       : F(F), AA(AA), EI(DT, &LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
-        PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
+        PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI),
+        TTI(TTI) {
     // Collect blocks with throwing instructions not modeled in MemorySSA and
     // alloc-like objects.
     unsigned PO = 0;
@@ -1978,7 +2055,7 @@ struct DSEState {
     return false;
   }
 
-  bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
+  bool removePartiallyOverlappedIntrinsicStores(InstOverlapIntervalsTy &IOL) {
     bool Changed = false;
     for (auto OI : IOL) {
       Instruction *DeadI = OI.first;
@@ -1994,6 +2071,9 @@ struct DSEState {
       if (IntervalMap.empty())
         continue;
       Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
+      if (IntervalMap.empty())
+        continue;
+      Changed |= tryToSplitMiddle(DeadI, IntervalMap, DeadStart, DeadSize, TTI);
     }
     return Changed;
   }
@@ -2059,10 +2139,11 @@ struct DSEState {
 static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
                                 DominatorTree &DT, PostDominatorTree &PDT,
                                 const TargetLibraryInfo &TLI,
-                                const LoopInfo &LI) {
+                                const LoopInfo &LI,
+                                const TargetTransformInfo &TTI) {
   bool MadeChange = false;
 
-  DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
+  DSEState State(F, AA, MSSA, DT, PDT, TLI, LI, TTI);
   // For each store:
   for (unsigned I = 0; I < State.MemDefs.size(); I++) {
     MemoryDef *KillingDef = State.MemDefs[I];
@@ -2226,7 +2307,7 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
 
   if (EnablePartialOverwriteTracking)
     for (auto &KV : State.IOLs)
-      MadeChange |= State.removePartiallyOverlappedStores(KV.second);
+      MadeChange |= State.removePartiallyOverlappedIntrinsicStores(KV.second);
 
   MadeChange |= State.eliminateRedundantStoresOfExistingValues();
   MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
@@ -2244,8 +2325,9 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
   MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
   PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
   LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
+  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
 
-  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
+  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI, TTI);
 
 #ifdef LLVM_ENABLE_STATS
   if (AreStatisticsEnabled())
diff --git a/llvm/test/DebugInfo/dse-split-memintrinsics.ll b/llvm/test/DebugInfo/dse-split-memintrinsics.ll
index 1decd22add665b..bb586ec934ea6a 100644
--- a/llvm/test/DebugInfo/dse-split-memintrinsics.ll
+++ b/llvm/test/DebugInfo/dse-split-memintrinsics.ll
@@ -10,11 +10,14 @@ define void @_Z22overwrite_middle_localv() !dbg !23 {
 ; CHECK-NEXT:    [[BLOB:%.*]] = alloca [1000 x i8], align 16, !DIAssignID [[DIASSIGNID33:![0-9]+]]
 ; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28:![0-9]+]], metadata !DIExpression(), metadata [[DIASSIGNID33]], metadata ptr [[BLOB]], metadata !DIExpression()), !dbg [[DBG34:![0-9]+]]
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 1000, ptr nonnull [[BLOB]]), !dbg [[DBG34]]
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) [[BLOB]], i8 5, i64 1000, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID35:![0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[BLOB]], i64 976
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(24) [[TMP0]], i8 5, i64 24, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID35:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(10) [[BLOB]], i8 5, i64 10, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID35]]
 ; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(), metadata [[DIASSIGNID35]], metadata ptr [[BLOB]], metadata !DIExpression()), !dbg [[DBG34]]
+; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7920), metadata [[META36:![0-9]+]], metadata ptr undef, metadata !DIExpression()), !dbg [[DBG34]]
 ; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[BLOB]], i64 10, !dbg [[DBG34]]
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 2 dereferenceable(980) [[ADD_PTR]], i8 3, i64 980, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID36:![0-9]+]]
-; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7840), metadata [[DIASSIGNID36]], metadata ptr [[ADD_PTR]], metadata !DIExpression()), !dbg [[DBG34]]
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 2 dereferenceable(980) [[ADD_PTR]], i8 3, i64 980, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID37:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7840), metadata [[DIASSIGNID37]], metadata ptr [[ADD_PTR]], metadata !DIExpression()), !dbg [[DBG34]]
 ; CHECK-NEXT:    call void @_Z3escPc(ptr noundef nonnull [[BLOB]]), !dbg [[DBG34]]
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 1000, ptr nonnull [[BLOB]]), !dbg [[DBG34]]
 ; CHECK-NEXT:    ret void, !dbg [[DBG34]]
@@ -113,5 +116,6 @@ declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata,
 ; CHECK: [[DIASSIGNID33]] = distinct !DIAssignID()
 ; CHECK: [[DBG34]] = !DILocation(line: 0, scope: [[DBG23]])
 ; CHECK: [[DIASSIGNID35]] = distinct !DIAssignID()
-; CHECK: [[DIASSIGNID36]] = distinct !DIAssignID()
+; CHECK: [[META36]] = distinct !DIAssignID()
+; CHECK: [[DIASSIGNID37]] = distinct !DIAssignID()
 ;.
diff --git a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
index 8d72deefc635b2..cf74b6013e2ae4 100644
--- a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
@@ -6,7 +6,9 @@ define dso_local void @overwrite_middle(ptr nocapture noundef writeonly %X) loca
 ; CHECK-LABEL: define dso_local void @overwrite_middle(
 ; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) [[X]], i8 5, i64 1000, i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 976
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(24) [[TMP0]], i8 5, i64 24, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(10) [[X]], i8 5, i64 10, i1 false)
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 10
 ; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(980) [[ARRAYIDX]], i8 3, i64 980, i1 false)
 ; CHECK-NEXT:    ret void
@@ -22,7 +24,9 @@ define dso_local void @overwrite_middle2(ptr nocapture noundef writeonly %X) loc
 ; CHECK-LABEL: define dso_local void @overwrite_middle2(
 ; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(1000) [[X]], i8 5, i64 1000, i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 990
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) [[TMP0]], i8 5, i64 10, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) [[X]], i8 5, i64 10, i1 false)
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 10
 ; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(980) [[ARRAYIDX]], i8 3, i64 980, i1 false)
 ; CHECK-NEXT:    ret void

>From 9fc2f5782d4c6da4d2361be6b15ae15dcbf56376 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Fri, 15 Dec 2023 13:30:17 +0000
Subject: [PATCH 3/7] Address review comments

- Remove unnecessary array
- Simplify debuginfo test
- Add tests to cover the profitability check
-
---
 .../Scalar/DeadStoreElimination.cpp           |  3 +-
 .../test/DebugInfo/dse-split-memintrinsics.ll |  8 ----
 .../DeadStoreElimination/dead-middle-split.ll | 47 +++++++++++++++----
 3 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 57ff807745cba9..6766cf6df61ed1 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -610,9 +610,8 @@ static bool tryToSplitMiddle(Instruction *DeadI,
       ConstantInt::get(DeadWriteLength->getType(), FrontSize));
   DeadIntrinsic->addDereferenceableParamAttr(0, FrontSize);
 
-  Value *Indices[1] = {ConstantInt::get(DeadWriteLength->getType(), RearStart)};
   Instruction *RearDestGEP = GetElementPtrInst::CreateInBounds(
-      Type::getInt8Ty(DeadIntrinsic->getContext()), DeadDest, Indices, "",
+      Type::getInt8Ty(DeadIntrinsic->getContext()), DeadDest, ConstantInt::get(DeadWriteLength->getType(), RearStart), "",
       DeadI);
   auto *Rear = cast<AnyMemIntrinsic>(DeadIntrinsic->clone());
   Rear->setDest(RearDestGEP);
diff --git a/llvm/test/DebugInfo/dse-split-memintrinsics.ll b/llvm/test/DebugInfo/dse-split-memintrinsics.ll
index bb586ec934ea6a..f51f8ce0e6f805 100644
--- a/llvm/test/DebugInfo/dse-split-memintrinsics.ll
+++ b/llvm/test/DebugInfo/dse-split-memintrinsics.ll
@@ -9,7 +9,6 @@ define void @_Z22overwrite_middle_localv() !dbg !23 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[BLOB:%.*]] = alloca [1000 x i8], align 16, !DIAssignID [[DIASSIGNID33:![0-9]+]]
 ; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28:![0-9]+]], metadata !DIExpression(), metadata [[DIASSIGNID33]], metadata ptr [[BLOB]], metadata !DIExpression()), !dbg [[DBG34:![0-9]+]]
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 1000, ptr nonnull [[BLOB]]), !dbg [[DBG34]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[BLOB]], i64 976
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(24) [[TMP0]], i8 5, i64 24, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID35:![0-9]+]]
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(10) [[BLOB]], i8 5, i64 10, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID35]]
@@ -19,31 +18,24 @@ define void @_Z22overwrite_middle_localv() !dbg !23 {
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 2 dereferenceable(980) [[ADD_PTR]], i8 3, i64 980, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID37:![0-9]+]]
 ; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7840), metadata [[DIASSIGNID37]], metadata ptr [[ADD_PTR]], metadata !DIExpression()), !dbg [[DBG34]]
 ; CHECK-NEXT:    call void @_Z3escPc(ptr noundef nonnull [[BLOB]]), !dbg [[DBG34]]
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 1000, ptr nonnull [[BLOB]]), !dbg [[DBG34]]
 ; CHECK-NEXT:    ret void, !dbg [[DBG34]]
 ;
 entry:
   %blob = alloca [1000 x i8], align 16, !DIAssignID !33
   call void @llvm.dbg.assign(metadata i1 undef, metadata !28, metadata !DIExpression(), metadata !33, metadata ptr %blob, metadata !DIExpression()), !dbg !34
-  call void @llvm.lifetime.start.p0(i64 1000, ptr nonnull %blob), !dbg !34
   call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) %blob, i8 5, i64 1000, i1 false), !dbg !34, !DIAssignID !35
   call void @llvm.dbg.assign(metadata i1 undef, metadata !28, metadata !DIExpression(), metadata !35, metadata ptr %blob, metadata !DIExpression()), !dbg !34
   %add.ptr = getelementptr inbounds i8, ptr %blob, i64 10, !dbg !34
   call void @llvm.memset.p0.i64(ptr noundef nonnull align 2 dereferenceable(980) %add.ptr, i8 3, i64 980, i1 false), !dbg !34, !DIAssignID !36
   call void @llvm.dbg.assign(metadata i1 undef, metadata !28, metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7840), metadata !36, metadata ptr %add.ptr, metadata !DIExpression()), !dbg !34
   call void @_Z3escPc(ptr noundef nonnull %blob), !dbg !34
-  call void @llvm.lifetime.end.p0(i64 1000, ptr nonnull %blob), !dbg !34
   ret void, !dbg !34
 }
 
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
-
 declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
 
 declare void @_Z3escPc(ptr noundef)
 
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
-
 declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata)
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
index cf74b6013e2ae4..8b20fa346bb661 100644
--- a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
@@ -2,9 +2,9 @@
 
 ; RUN: opt < %s -passes=dse -S | FileCheck %s
 
-define dso_local void @overwrite_middle(ptr nocapture noundef writeonly %X) local_unnamed_addr #0 {
-; CHECK-LABEL: define dso_local void @overwrite_middle(
-; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) local_unnamed_addr {
+define void @overwrite_middle(ptr nocapture noundef writeonly %X) {
+; CHECK-LABEL: define void @overwrite_middle(
+; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 976
 ; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(24) [[TMP0]], i8 5, i64 24, i1 false)
@@ -20,9 +20,9 @@ entry:
   ret void
 }
 
-define dso_local void @overwrite_middle2(ptr nocapture noundef writeonly %X) local_unnamed_addr #0 {
-; CHECK-LABEL: define dso_local void @overwrite_middle2(
-; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) local_unnamed_addr {
+define void @overwrite_middle2(ptr nocapture noundef writeonly %X) {
+; CHECK-LABEL: define void @overwrite_middle2(
+; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 990
 ; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) [[TMP0]], i8 5, i64 10, i1 false)
@@ -38,6 +38,37 @@ entry:
   ret void
 }
 
-; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
-declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1
+define void @front_and_rear_bigger_than_threshold(ptr nocapture noundef writeonly %X) {
+; CHECK-LABEL: define void @front_and_rear_bigger_than_threshold(
+; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(1000) [[X]], i8 5, i64 1000, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 65
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(800) [[ARRAYIDX]], i8 3, i64 800, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(1000) %X, i8 5, i64 1000, i1 false)
+  %arrayidx = getelementptr inbounds i8, ptr %X, i64 65
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(800) %arrayidx, i8 3, i64 800, i1 false)
+  ret void
+}
+
+define void @dead_smaller_than_threshold(ptr nocapture noundef writeonly %X) {
+; CHECK-LABEL: define void @dead_smaller_than_threshold(
+; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(50) [[X]], i8 5, i64 50, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 10
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(45) [[ARRAYIDX]], i8 3, i64 25, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(50) %X, i8 5, i64 50, i1 false)
+  %arrayidx = getelementptr inbounds i8, ptr %X, i64 10
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(45) %arrayidx, i8 3, i64 25, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
 

>From c9114b5cafdebc1a07d46da823568f101b12d34b Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Fri, 15 Dec 2023 13:44:10 +0000
Subject: [PATCH 4/7] Clang format patch

---
 llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 6766cf6df61ed1..e85bc1b3478a23 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -611,8 +611,8 @@ static bool tryToSplitMiddle(Instruction *DeadI,
   DeadIntrinsic->addDereferenceableParamAttr(0, FrontSize);
 
   Instruction *RearDestGEP = GetElementPtrInst::CreateInBounds(
-      Type::getInt8Ty(DeadIntrinsic->getContext()), DeadDest, ConstantInt::get(DeadWriteLength->getType(), RearStart), "",
-      DeadI);
+      Type::getInt8Ty(DeadIntrinsic->getContext()), DeadDest,
+      ConstantInt::get(DeadWriteLength->getType(), RearStart), "", DeadI);
   auto *Rear = cast<AnyMemIntrinsic>(DeadIntrinsic->clone());
   Rear->setDest(RearDestGEP);
   Rear->setLength(ConstantInt::get(DeadWriteLength->getType(), RearSize));

>From b52e32b30f85c9b7353b415a00a8e670d2a49eb8 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Tue, 30 Jan 2024 11:32:32 +0000
Subject: [PATCH 5/7] Add check for atomic intrinsics

---
 .../Scalar/DeadStoreElimination.cpp           | 18 ++++++++++++-----
 .../DeadStoreElimination/dead-middle-split.ll | 20 ++++++++++++++++++-
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index e85bc1b3478a23..fb8a4e5f6f12e5 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -574,8 +574,8 @@ static bool tryToSplitMiddle(Instruction *DeadI,
   // | ------------- Dead ------------- |
   //         | --- Killing --- |
 
-  if (KillingStart < DeadStart ||
-      uint64_t(KillingStart + KillingSize) > uint64_t(DeadStart + DeadSize))
+  if (KillingStart <= DeadStart ||
+      uint64_t(KillingStart + KillingSize) >= uint64_t(DeadStart + DeadSize))
     return false;
 
   auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
@@ -594,6 +594,14 @@ static bool tryToSplitMiddle(Instruction *DeadI,
   if ((FrontSize > Threshold && RearSize > Threshold) || DeadSize < Threshold)
     return false;
 
+  if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
+    // When shortening an atomic memory intrinsic the size of Front and Rear
+    // must be a multiple of the element size.
+    const uint32_t ElementSize = AMI->getElementSizeInBytes();
+    if (FrontSize % ElementSize != 0 || RearSize % ElementSize != 0)
+      return false;
+  }
+
   Value *DeadWriteLength = DeadIntrinsic->getLength();
   Value *DeadDest = DeadIntrinsic->getRawDest();
 
@@ -2066,13 +2074,13 @@ struct DSEState {
       uint64_t DeadSize = Loc.Size.getValue();
       GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
       OverlapIntervalsTy &IntervalMap = OI.second;
-      Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
+      Changed |= tryToSplitMiddle(DeadI, IntervalMap, DeadStart, DeadSize, TTI);
       if (IntervalMap.empty())
         continue;
-      Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
+      Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
       if (IntervalMap.empty())
         continue;
-      Changed |= tryToSplitMiddle(DeadI, IntervalMap, DeadStart, DeadSize, TTI);
+      Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
     }
     return Changed;
   }
diff --git a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
index 8b20fa346bb661..80bc38fc740231 100644
--- a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
@@ -70,5 +70,23 @@ entry:
   ret void
 }
 
-declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
+define void @dontwrite28to32memset_atomic(ptr nocapture %X) {
+; CHECK-LABEL: define void @dontwrite28to32memset_atomic(
+; CHECK-SAME: ptr nocapture [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 976
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 dereferenceable(24) [[TMP0]], i8 5, i64 24, i32 4)
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 dereferenceable(12) [[X]], i8 5, i64 12, i32 4)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 12
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 [[ARRAYIDX]], i8 3, i64 978, i32 4)
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 %X, i8 5, i64 1000, i32 4)
+  %arrayidx = getelementptr inbounds i8, ptr %X, i64 12
+  call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 %arrayidx, i8 3, i64 978, i32 4)
+  ret void
+}
 
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
+declare void @llvm.memset.element.unordered.atomic.p0.i64(ptr, i8, i64, i32)

>From 001b02b93d3f394a3796478cf289e995b9746d45 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Mon, 12 Feb 2024 13:57:18 +0000
Subject: [PATCH 6/7] Empty commit to re-trigger CI


>From ec23d139700b5ca851b398619ac3474d2bdd8704 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Mon, 29 Apr 2024 12:09:42 +0100
Subject: [PATCH 7/7] Remove (DeadSize >= Threshold) requirement

---
 llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index fb8a4e5f6f12e5..c787616229238a 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -589,9 +589,8 @@ static bool tryToSplitMiddle(Instruction *DeadI,
   uint64_t RearSize = (DeadStart + DeadSize) - RearStart;
 
   // If Front and Rear are both bigger than the threshold they won't be inlined
-  // so this seems like a bad idea. If Dead is smaller than the threshold it
-  // will be inlined so this isn't a good idea.
-  if ((FrontSize > Threshold && RearSize > Threshold) || DeadSize < Threshold)
+  // in which case we want to bail out.
+  if (FrontSize > Threshold && RearSize > Threshold)
     return false;
 
   if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {



More information about the llvm-commits mailing list