[llvm] [DSE] Split memory intrinsics if they are dead in the middle (PR #75478)

Nabeel Omer via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 15 05:44:24 PST 2023


https://github.com/omern1 updated https://github.com/llvm/llvm-project/pull/75478

>From 5b68d4dfcdaff475dcee4f29818ba16ab317a507 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Thu, 14 Dec 2023 18:01:39 +0000
Subject: [PATCH 1/4] Pre-committed tests

---
 .../test/DebugInfo/dse-split-memintrinsics.ll | 117 ++++++++++++++++++
 .../DeadStoreElimination/dead-middle-split.ll |  39 ++++++
 2 files changed, 156 insertions(+)
 create mode 100644 llvm/test/DebugInfo/dse-split-memintrinsics.ll
 create mode 100644 llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll

diff --git a/llvm/test/DebugInfo/dse-split-memintrinsics.ll b/llvm/test/DebugInfo/dse-split-memintrinsics.ll
new file mode 100644
index 00000000000000..1decd22add665b
--- /dev/null
+++ b/llvm/test/DebugInfo/dse-split-memintrinsics.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=dse -S | FileCheck %s
+
+; Check a dbg.assign is inserted that sets the dead middle bits to no-location (see tryToSplitMiddle).
+
+define void @_Z22overwrite_middle_localv() !dbg !23 {
+; CHECK-LABEL: define void @_Z22overwrite_middle_localv(
+; CHECK-SAME: ) !dbg [[DBG23:![0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BLOB:%.*]] = alloca [1000 x i8], align 16, !DIAssignID [[DIASSIGNID33:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28:![0-9]+]], metadata !DIExpression(), metadata [[DIASSIGNID33]], metadata ptr [[BLOB]], metadata !DIExpression()), !dbg [[DBG34:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 1000, ptr nonnull [[BLOB]]), !dbg [[DBG34]]
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) [[BLOB]], i8 5, i64 1000, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID35:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(), metadata [[DIASSIGNID35]], metadata ptr [[BLOB]], metadata !DIExpression()), !dbg [[DBG34]]
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[BLOB]], i64 10, !dbg [[DBG34]]
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 2 dereferenceable(980) [[ADD_PTR]], i8 3, i64 980, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID36:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7840), metadata [[DIASSIGNID36]], metadata ptr [[ADD_PTR]], metadata !DIExpression()), !dbg [[DBG34]]
+; CHECK-NEXT:    call void @_Z3escPc(ptr noundef nonnull [[BLOB]]), !dbg [[DBG34]]
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 1000, ptr nonnull [[BLOB]]), !dbg [[DBG34]]
+; CHECK-NEXT:    ret void, !dbg [[DBG34]]
+;
+entry:
+  %blob = alloca [1000 x i8], align 16, !DIAssignID !33
+  call void @llvm.dbg.assign(metadata i1 undef, metadata !28, metadata !DIExpression(), metadata !33, metadata ptr %blob, metadata !DIExpression()), !dbg !34
+  call void @llvm.lifetime.start.p0(i64 1000, ptr nonnull %blob), !dbg !34
+  call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) %blob, i8 5, i64 1000, i1 false), !dbg !34, !DIAssignID !35
+  call void @llvm.dbg.assign(metadata i1 undef, metadata !28, metadata !DIExpression(), metadata !35, metadata ptr %blob, metadata !DIExpression()), !dbg !34
+  %add.ptr = getelementptr inbounds i8, ptr %blob, i64 10, !dbg !34
+  call void @llvm.memset.p0.i64(ptr noundef nonnull align 2 dereferenceable(980) %add.ptr, i8 3, i64 980, i1 false), !dbg !34, !DIAssignID !36
+  call void @llvm.dbg.assign(metadata i1 undef, metadata !28, metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7840), metadata !36, metadata ptr %add.ptr, metadata !DIExpression()), !dbg !34
+  call void @_Z3escPc(ptr noundef nonnull %blob), !dbg !34
+  call void @llvm.lifetime.end.p0(i64 1000, ptr nonnull %blob), !dbg !34
+  ret void, !dbg !34
+}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
+
+declare void @_Z3escPc(ptr noundef)
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
+
+declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!15, !16, !17, !18, !19, !20, !21}
+!llvm.ident = !{!22}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, imports: !2, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "debuginfo.cpp", directory: "/home", checksumkind: CSK_MD5, checksum: "3dc84462c14a3d86dd372d0473fa13aa")
+!2 = !{!3}
+!3 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !4, entity: !5, file: !14, line: 81)
+!4 = !DINamespace(name: "std", scope: null)
+!5 = !DISubprogram(name: "memset", scope: !6, file: !6, line: 61, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
+!6 = !DIFile(filename: "/usr/include/string.h", directory: "", checksumkind: CSK_MD5, checksum: "3fc3efdf2e52b973f380a6e7608374ff")
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9, !9, !10, !11}
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", file: !12, line: 13, baseType: !13)
+!12 = !DIFile(filename: "build_upstream/lib/clang/18/include/__stddef_size_t.h", directory: "/home", checksumkind: CSK_MD5, checksum: "405db6ea5fb824de326715f26fa9fab5")
+!13 = !DIBasicType(name: "unsigned long", size: 64, encoding: DW_ATE_unsigned)
+!14 = !DIFile(filename: "/usr/lib64/gcc/x86_64-suse-linux/13/../../../../include/c++/13/cstring", directory: "")
+!15 = !{i32 7, !"Dwarf Version", i32 5}
+!16 = !{i32 2, !"Debug Info Version", i32 3}
+!17 = !{i32 1, !"wchar_size", i32 4}
+!18 = !{i32 8, !"PIC Level", i32 2}
+!19 = !{i32 7, !"PIE Level", i32 2}
+!20 = !{i32 7, !"uwtable", i32 2}
+!21 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+!22 = !{!"clang version 18.0.0"}
+!23 = distinct !DISubprogram(name: "overwrite_middle_local", linkageName: "_Z22overwrite_middle_localv", scope: !24, file: !24, line: 3, type: !25, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !27)
+!24 = !DIFile(filename: "./debuginfo.cpp", directory: "/home", checksumkind: CSK_MD5, checksum: "3dc84462c14a3d86dd372d0473fa13aa")
+!25 = !DISubroutineType(types: !26)
+!26 = !{null}
+!27 = !{!28}
+!28 = !DILocalVariable(name: "blob", scope: !23, file: !24, line: 4, type: !29)
+!29 = !DICompositeType(tag: DW_TAG_array_type, baseType: !30, size: 8000, elements: !31)
+!30 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!31 = !{!32}
+!32 = !DISubrange(count: 1000)
+!33 = distinct !DIAssignID()
+!34 = !DILocation(line: 0, scope: !23)
+!35 = distinct !DIAssignID()
+!36 = distinct !DIAssignID()
+;.
+; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, imports: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
+; CHECK: [[META1]] = !DIFile(filename: "debuginfo.cpp", directory: {{.*}})
+; CHECK: [[META2]] = !{[[META3:![0-9]+]]}
+; CHECK: [[META3]] = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: [[META4:![0-9]+]], entity: [[META5:![0-9]+]], file: [[META14:![0-9]+]], line: 81)
+; CHECK: [[META4]] = !DINamespace(name: "std", scope: null)
+; CHECK: [[META5]] = !DISubprogram(name: "memset", scope: [[META6:![0-9]+]], file: [[META6]], line: 61, type: [[META7:![0-9]+]], flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
+; CHECK: [[META6]] = !DIFile(filename: "/usr/include/string.h", directory: {{.*}})
+; CHECK: [[META7]] = !DISubroutineType(types: [[META8:![0-9]+]])
+; CHECK: [[META8]] = !{[[META9:![0-9]+]], [[META9]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
+; CHECK: [[META9]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
+; CHECK: [[META10]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+; CHECK: [[META11]] = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", file: [[META12:![0-9]+]], line: 13, baseType: [[META13:![0-9]+]])
+; CHECK: [[META12]] = !DIFile(filename: "build_upstream/lib/clang/18/include/__stddef_size_t.h", directory: {{.*}})
+; CHECK: [[META13]] = !DIBasicType(name: "unsigned long", size: 64, encoding: DW_ATE_unsigned)
+; CHECK: [[META14]] = !DIFile(filename: "/usr/lib64/gcc/x86_64-suse-linux/13/../../../../include/c++/13/cstring", directory: "")
+; CHECK: [[DBG23]] = distinct !DISubprogram(name: "overwrite_middle_local", linkageName: "_Z22overwrite_middle_localv", scope: [[META24:![0-9]+]], file: [[META24]], line: 3, type: [[META25:![0-9]+]], scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META27:![0-9]+]])
+; CHECK: [[META24]] = !DIFile(filename: "./debuginfo.cpp", directory: {{.*}})
+; CHECK: [[META25]] = !DISubroutineType(types: [[META26:![0-9]+]])
+; CHECK: [[META26]] = !{null}
+; CHECK: [[META27]] = !{[[META28]]}
+; CHECK: [[META28]] = !DILocalVariable(name: "blob", scope: [[DBG23]], file: [[META24]], line: 4, type: [[META29:![0-9]+]])
+; CHECK: [[META29]] = !DICompositeType(tag: DW_TAG_array_type, baseType: [[META30:![0-9]+]], size: 8000, elements: [[META31:![0-9]+]])
+; CHECK: [[META30]] = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+; CHECK: [[META31]] = !{[[META32:![0-9]+]]}
+; CHECK: [[META32]] = !DISubrange(count: 1000)
+; CHECK: [[DIASSIGNID33]] = distinct !DIAssignID()
+; CHECK: [[DBG34]] = !DILocation(line: 0, scope: [[DBG23]])
+; CHECK: [[DIASSIGNID35]] = distinct !DIAssignID()
+; CHECK: [[DIASSIGNID36]] = distinct !DIAssignID()
+;.
diff --git a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
new file mode 100644
index 00000000000000..8d72deefc635b2
--- /dev/null
+++ b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+
+; RUN: opt < %s -passes=dse -S | FileCheck %s
+
+define dso_local void @overwrite_middle(ptr nocapture noundef writeonly %X) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @overwrite_middle(
+; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) [[X]], i8 5, i64 1000, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 10
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(980) [[ARRAYIDX]], i8 3, i64 980, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) %X, i8 5, i64 1000, i1 false)
+  %arrayidx = getelementptr inbounds i8, ptr %X, i64 10
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(980) %arrayidx, i8 3, i64 980, i1 false)
+  ret void
+}
+
+define dso_local void @overwrite_middle2(ptr nocapture noundef writeonly %X) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @overwrite_middle2(
+; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(1000) [[X]], i8 5, i64 1000, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 10
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(980) [[ARRAYIDX]], i8 3, i64 980, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(1000) %X, i8 5, i64 1000, i1 false)
+  %arrayidx = getelementptr inbounds i8, ptr %X, i64 10
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(980) %arrayidx, i8 3, i64 980, i1 false)
+  ret void
+}
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1
+

>From 6f26953df74bb8a9c0d6d760870882af7db43670 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Thu, 14 Dec 2023 18:03:20 +0000
Subject: [PATCH 2/4] [DSE] Split memory intrinsics that are dead in the middle

DSE can already shorten intrinsics which have dead fronts or rears.
This patch enables DSE to split memory intrinsics that
are dead in the middle into `Front` and `Rear`:

```
  // __Front__                 ___Rear___
  // | ------------- Dead ------------- |
  //         | --- Killing --- |
```

Resolves #72113
---
 .../Scalar/DeadStoreElimination.cpp           | 96 +++++++++++++++++--
 .../test/DebugInfo/dse-split-memintrinsics.ll | 12 ++-
 .../DeadStoreElimination/dead-middle-split.ll |  8 +-
 3 files changed, 103 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 008dcc53fd44fc..57ff807745cba9 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -48,6 +48,7 @@
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/BasicBlock.h"
@@ -554,6 +555,80 @@ static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
   }
 }
 
+static bool tryToSplitMiddle(Instruction *DeadI,
+                             OverlapIntervalsTy &IntervalMap,
+                             int64_t &DeadStart, uint64_t &DeadSize,
+                             const TargetTransformInfo &TTI) {
+  if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
+    return false;
+
+  OverlapIntervalsTy::iterator OII = IntervalMap.begin();
+  int64_t KillingStart = OII->second;
+  uint64_t KillingSize = OII->first - KillingStart;
+
+  assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
+
+  uint64_t Threshold = TTI.getMaxMemIntrinsicInlineSizeThreshold();
+
+  // __Front__                 ___Rear___
+  // | ------------- Dead ------------- |
+  //         | --- Killing --- |
+
+  if (KillingStart < DeadStart ||
+      uint64_t(KillingStart + KillingSize) > uint64_t(DeadStart + DeadSize))
+    return false;
+
+  auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
+  Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
+
+  // Assume Front is already correctly aligned.
+  uint64_t FrontSize = KillingStart - DeadStart;
+
+  int64_t RearStart =
+      alignDown(uint64_t(KillingStart + KillingSize), PrefAlign.value());
+  uint64_t RearSize = (DeadStart + DeadSize) - RearStart;
+
+  // If Front and Rear are both bigger than the threshold they won't be inlined
+  // so this seems like a bad idea. If Dead is smaller than the threshold it
+  // will be inlined so this isn't a good idea.
+  if ((FrontSize > Threshold && RearSize > Threshold) || DeadSize < Threshold)
+    return false;
+
+  Value *DeadWriteLength = DeadIntrinsic->getLength();
+  Value *DeadDest = DeadIntrinsic->getRawDest();
+
+  LLVM_DEBUG(dbgs() << "DSE: Split and shortened partially dead store: ["
+                    << DeadStart << ", " << DeadSize + DeadStart
+                    << "]\nInto: Front: [" << DeadStart << ", "
+                    << DeadStart + FrontSize << "], Rear: [" << RearStart
+                    << ", " << RearStart + RearSize << "]\n"
+                    << "Killer: [" << KillingStart << ", "
+                    << KillingSize + KillingStart << "]\n");
+
+  // Dead is now Front.
+  DeadIntrinsic->setLength(
+      ConstantInt::get(DeadWriteLength->getType(), FrontSize));
+  DeadIntrinsic->addDereferenceableParamAttr(0, FrontSize);
+
+  Value *Indices[1] = {ConstantInt::get(DeadWriteLength->getType(), RearStart)};
+  Instruction *RearDestGEP = GetElementPtrInst::CreateInBounds(
+      Type::getInt8Ty(DeadIntrinsic->getContext()), DeadDest, Indices, "",
+      DeadI);
+  auto *Rear = cast<AnyMemIntrinsic>(DeadIntrinsic->clone());
+  Rear->setDest(RearDestGEP);
+  Rear->setLength(ConstantInt::get(DeadWriteLength->getType(), RearSize));
+  Rear->insertAfter(RearDestGEP);
+  Rear->setDestAlignment(PrefAlign);
+  Rear->addDereferenceableParamAttr(0, RearSize);
+
+  shortenAssignment(DeadI, DeadDest, DeadStart * 8, DeadSize * 8, FrontSize * 8,
+                    true);
+
+  IntervalMap.erase(OII);
+  DeadSize = FrontSize;
+  return true;
+}
+
 static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
                          uint64_t &DeadSize, int64_t KillingStart,
                          uint64_t KillingSize, bool IsOverwriteEnd) {
@@ -821,6 +896,7 @@ struct DSEState {
   const TargetLibraryInfo &TLI;
   const DataLayout &DL;
   const LoopInfo &LI;
+  const TargetTransformInfo &TTI;
 
   // Whether the function contains any irreducible control flow, useful for
   // being accurately able to detect loops.
@@ -860,9 +936,10 @@ struct DSEState {
 
   DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
            PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
-           const LoopInfo &LI)
+           const LoopInfo &LI, const TargetTransformInfo &TTI)
       : F(F), AA(AA), EI(DT, &LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
-        PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
+        PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI),
+        TTI(TTI) {
     // Collect blocks with throwing instructions not modeled in MemorySSA and
     // alloc-like objects.
     unsigned PO = 0;
@@ -1978,7 +2055,7 @@ struct DSEState {
     return false;
   }
 
-  bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
+  bool removePartiallyOverlappedIntrinsicStores(InstOverlapIntervalsTy &IOL) {
     bool Changed = false;
     for (auto OI : IOL) {
       Instruction *DeadI = OI.first;
@@ -1994,6 +2071,9 @@ struct DSEState {
       if (IntervalMap.empty())
         continue;
       Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
+      if (IntervalMap.empty())
+        continue;
+      Changed |= tryToSplitMiddle(DeadI, IntervalMap, DeadStart, DeadSize, TTI);
     }
     return Changed;
   }
@@ -2059,10 +2139,11 @@ struct DSEState {
 static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
                                 DominatorTree &DT, PostDominatorTree &PDT,
                                 const TargetLibraryInfo &TLI,
-                                const LoopInfo &LI) {
+                                const LoopInfo &LI,
+                                const TargetTransformInfo &TTI) {
   bool MadeChange = false;
 
-  DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
+  DSEState State(F, AA, MSSA, DT, PDT, TLI, LI, TTI);
   // For each store:
   for (unsigned I = 0; I < State.MemDefs.size(); I++) {
     MemoryDef *KillingDef = State.MemDefs[I];
@@ -2226,7 +2307,7 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
 
   if (EnablePartialOverwriteTracking)
     for (auto &KV : State.IOLs)
-      MadeChange |= State.removePartiallyOverlappedStores(KV.second);
+      MadeChange |= State.removePartiallyOverlappedIntrinsicStores(KV.second);
 
   MadeChange |= State.eliminateRedundantStoresOfExistingValues();
   MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
@@ -2244,8 +2325,9 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
   MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
   PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
   LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
+  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
 
-  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
+  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI, TTI);
 
 #ifdef LLVM_ENABLE_STATS
   if (AreStatisticsEnabled())
diff --git a/llvm/test/DebugInfo/dse-split-memintrinsics.ll b/llvm/test/DebugInfo/dse-split-memintrinsics.ll
index 1decd22add665b..bb586ec934ea6a 100644
--- a/llvm/test/DebugInfo/dse-split-memintrinsics.ll
+++ b/llvm/test/DebugInfo/dse-split-memintrinsics.ll
@@ -10,11 +10,14 @@ define void @_Z22overwrite_middle_localv() !dbg !23 {
 ; CHECK-NEXT:    [[BLOB:%.*]] = alloca [1000 x i8], align 16, !DIAssignID [[DIASSIGNID33:![0-9]+]]
 ; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28:![0-9]+]], metadata !DIExpression(), metadata [[DIASSIGNID33]], metadata ptr [[BLOB]], metadata !DIExpression()), !dbg [[DBG34:![0-9]+]]
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 1000, ptr nonnull [[BLOB]]), !dbg [[DBG34]]
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) [[BLOB]], i8 5, i64 1000, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID35:![0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[BLOB]], i64 976
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(24) [[TMP0]], i8 5, i64 24, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID35:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(10) [[BLOB]], i8 5, i64 10, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID35]]
 ; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(), metadata [[DIASSIGNID35]], metadata ptr [[BLOB]], metadata !DIExpression()), !dbg [[DBG34]]
+; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7920), metadata [[META36:![0-9]+]], metadata ptr undef, metadata !DIExpression()), !dbg [[DBG34]]
 ; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[BLOB]], i64 10, !dbg [[DBG34]]
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 2 dereferenceable(980) [[ADD_PTR]], i8 3, i64 980, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID36:![0-9]+]]
-; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7840), metadata [[DIASSIGNID36]], metadata ptr [[ADD_PTR]], metadata !DIExpression()), !dbg [[DBG34]]
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 2 dereferenceable(980) [[ADD_PTR]], i8 3, i64 980, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID37:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7840), metadata [[DIASSIGNID37]], metadata ptr [[ADD_PTR]], metadata !DIExpression()), !dbg [[DBG34]]
 ; CHECK-NEXT:    call void @_Z3escPc(ptr noundef nonnull [[BLOB]]), !dbg [[DBG34]]
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 1000, ptr nonnull [[BLOB]]), !dbg [[DBG34]]
 ; CHECK-NEXT:    ret void, !dbg [[DBG34]]
@@ -113,5 +116,6 @@ declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata,
 ; CHECK: [[DIASSIGNID33]] = distinct !DIAssignID()
 ; CHECK: [[DBG34]] = !DILocation(line: 0, scope: [[DBG23]])
 ; CHECK: [[DIASSIGNID35]] = distinct !DIAssignID()
-; CHECK: [[DIASSIGNID36]] = distinct !DIAssignID()
+; CHECK: [[META36]] = distinct !DIAssignID()
+; CHECK: [[DIASSIGNID37]] = distinct !DIAssignID()
 ;.
diff --git a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
index 8d72deefc635b2..cf74b6013e2ae4 100644
--- a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
@@ -6,7 +6,9 @@ define dso_local void @overwrite_middle(ptr nocapture noundef writeonly %X) loca
 ; CHECK-LABEL: define dso_local void @overwrite_middle(
 ; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) [[X]], i8 5, i64 1000, i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 976
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(24) [[TMP0]], i8 5, i64 24, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(10) [[X]], i8 5, i64 10, i1 false)
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 10
 ; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(980) [[ARRAYIDX]], i8 3, i64 980, i1 false)
 ; CHECK-NEXT:    ret void
@@ -22,7 +24,9 @@ define dso_local void @overwrite_middle2(ptr nocapture noundef writeonly %X) loc
 ; CHECK-LABEL: define dso_local void @overwrite_middle2(
 ; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(1000) [[X]], i8 5, i64 1000, i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 990
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) [[TMP0]], i8 5, i64 10, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) [[X]], i8 5, i64 10, i1 false)
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 10
 ; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(980) [[ARRAYIDX]], i8 3, i64 980, i1 false)
 ; CHECK-NEXT:    ret void

>From 9fc2f5782d4c6da4d2361be6b15ae15dcbf56376 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Fri, 15 Dec 2023 13:30:17 +0000
Subject: [PATCH 3/4] Address review comments

- Remove unnecessary array
- Simplify debuginfo test
- Add tests to cover the profitability check
-
---
 .../Scalar/DeadStoreElimination.cpp           |  3 +-
 .../test/DebugInfo/dse-split-memintrinsics.ll |  8 ----
 .../DeadStoreElimination/dead-middle-split.ll | 47 +++++++++++++++----
 3 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 57ff807745cba9..6766cf6df61ed1 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -610,9 +610,8 @@ static bool tryToSplitMiddle(Instruction *DeadI,
       ConstantInt::get(DeadWriteLength->getType(), FrontSize));
   DeadIntrinsic->addDereferenceableParamAttr(0, FrontSize);
 
-  Value *Indices[1] = {ConstantInt::get(DeadWriteLength->getType(), RearStart)};
   Instruction *RearDestGEP = GetElementPtrInst::CreateInBounds(
-      Type::getInt8Ty(DeadIntrinsic->getContext()), DeadDest, Indices, "",
+      Type::getInt8Ty(DeadIntrinsic->getContext()), DeadDest, ConstantInt::get(DeadWriteLength->getType(), RearStart), "",
       DeadI);
   auto *Rear = cast<AnyMemIntrinsic>(DeadIntrinsic->clone());
   Rear->setDest(RearDestGEP);
diff --git a/llvm/test/DebugInfo/dse-split-memintrinsics.ll b/llvm/test/DebugInfo/dse-split-memintrinsics.ll
index bb586ec934ea6a..f51f8ce0e6f805 100644
--- a/llvm/test/DebugInfo/dse-split-memintrinsics.ll
+++ b/llvm/test/DebugInfo/dse-split-memintrinsics.ll
@@ -9,7 +9,6 @@ define void @_Z22overwrite_middle_localv() !dbg !23 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[BLOB:%.*]] = alloca [1000 x i8], align 16, !DIAssignID [[DIASSIGNID33:![0-9]+]]
 ; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28:![0-9]+]], metadata !DIExpression(), metadata [[DIASSIGNID33]], metadata ptr [[BLOB]], metadata !DIExpression()), !dbg [[DBG34:![0-9]+]]
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 1000, ptr nonnull [[BLOB]]), !dbg [[DBG34]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[BLOB]], i64 976
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(24) [[TMP0]], i8 5, i64 24, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID35:![0-9]+]]
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(10) [[BLOB]], i8 5, i64 10, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID35]]
@@ -19,31 +18,24 @@ define void @_Z22overwrite_middle_localv() !dbg !23 {
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 2 dereferenceable(980) [[ADD_PTR]], i8 3, i64 980, i1 false), !dbg [[DBG34]], !DIAssignID [[DIASSIGNID37:![0-9]+]]
 ; CHECK-NEXT:    call void @llvm.dbg.assign(metadata i1 undef, metadata [[META28]], metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7840), metadata [[DIASSIGNID37]], metadata ptr [[ADD_PTR]], metadata !DIExpression()), !dbg [[DBG34]]
 ; CHECK-NEXT:    call void @_Z3escPc(ptr noundef nonnull [[BLOB]]), !dbg [[DBG34]]
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 1000, ptr nonnull [[BLOB]]), !dbg [[DBG34]]
 ; CHECK-NEXT:    ret void, !dbg [[DBG34]]
 ;
 entry:
   %blob = alloca [1000 x i8], align 16, !DIAssignID !33
   call void @llvm.dbg.assign(metadata i1 undef, metadata !28, metadata !DIExpression(), metadata !33, metadata ptr %blob, metadata !DIExpression()), !dbg !34
-  call void @llvm.lifetime.start.p0(i64 1000, ptr nonnull %blob), !dbg !34
   call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(1000) %blob, i8 5, i64 1000, i1 false), !dbg !34, !DIAssignID !35
   call void @llvm.dbg.assign(metadata i1 undef, metadata !28, metadata !DIExpression(), metadata !35, metadata ptr %blob, metadata !DIExpression()), !dbg !34
   %add.ptr = getelementptr inbounds i8, ptr %blob, i64 10, !dbg !34
   call void @llvm.memset.p0.i64(ptr noundef nonnull align 2 dereferenceable(980) %add.ptr, i8 3, i64 980, i1 false), !dbg !34, !DIAssignID !36
   call void @llvm.dbg.assign(metadata i1 undef, metadata !28, metadata !DIExpression(DW_OP_LLVM_fragment, 80, 7840), metadata !36, metadata ptr %add.ptr, metadata !DIExpression()), !dbg !34
   call void @_Z3escPc(ptr noundef nonnull %blob), !dbg !34
-  call void @llvm.lifetime.end.p0(i64 1000, ptr nonnull %blob), !dbg !34
   ret void, !dbg !34
 }
 
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
-
 declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
 
 declare void @_Z3escPc(ptr noundef)
 
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
-
 declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata)
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
index cf74b6013e2ae4..8b20fa346bb661 100644
--- a/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/dead-middle-split.ll
@@ -2,9 +2,9 @@
 
 ; RUN: opt < %s -passes=dse -S | FileCheck %s
 
-define dso_local void @overwrite_middle(ptr nocapture noundef writeonly %X) local_unnamed_addr #0 {
-; CHECK-LABEL: define dso_local void @overwrite_middle(
-; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) local_unnamed_addr {
+define void @overwrite_middle(ptr nocapture noundef writeonly %X) {
+; CHECK-LABEL: define void @overwrite_middle(
+; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 976
 ; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(24) [[TMP0]], i8 5, i64 24, i1 false)
@@ -20,9 +20,9 @@ entry:
   ret void
 }
 
-define dso_local void @overwrite_middle2(ptr nocapture noundef writeonly %X) local_unnamed_addr #0 {
-; CHECK-LABEL: define dso_local void @overwrite_middle2(
-; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) local_unnamed_addr {
+define void @overwrite_middle2(ptr nocapture noundef writeonly %X) {
+; CHECK-LABEL: define void @overwrite_middle2(
+; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 990
 ; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) [[TMP0]], i8 5, i64 10, i1 false)
@@ -38,6 +38,37 @@ entry:
   ret void
 }
 
-; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
-declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1
+define void @front_and_rear_bigger_than_threshold(ptr nocapture noundef writeonly %X) {
+; CHECK-LABEL: define void @front_and_rear_bigger_than_threshold(
+; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(1000) [[X]], i8 5, i64 1000, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 65
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(800) [[ARRAYIDX]], i8 3, i64 800, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(1000) %X, i8 5, i64 1000, i1 false)
+  %arrayidx = getelementptr inbounds i8, ptr %X, i64 65
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(800) %arrayidx, i8 3, i64 800, i1 false)
+  ret void
+}
+
+define void @dead_smaller_than_threshold(ptr nocapture noundef writeonly %X) {
+; CHECK-LABEL: define void @dead_smaller_than_threshold(
+; CHECK-SAME: ptr nocapture noundef writeonly [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(50) [[X]], i8 5, i64 50, i1 false)
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 10
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(45) [[ARRAYIDX]], i8 3, i64 25, i1 false)
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(50) %X, i8 5, i64 50, i1 false)
+  %arrayidx = getelementptr inbounds i8, ptr %X, i64 10
+  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(45) %arrayidx, i8 3, i64 25, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
 

>From c9114b5cafdebc1a07d46da823568f101b12d34b Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Fri, 15 Dec 2023 13:44:10 +0000
Subject: [PATCH 4/4] Clang format patch

---
 llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 6766cf6df61ed1..e85bc1b3478a23 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -611,8 +611,8 @@ static bool tryToSplitMiddle(Instruction *DeadI,
   DeadIntrinsic->addDereferenceableParamAttr(0, FrontSize);
 
   Instruction *RearDestGEP = GetElementPtrInst::CreateInBounds(
-      Type::getInt8Ty(DeadIntrinsic->getContext()), DeadDest, ConstantInt::get(DeadWriteLength->getType(), RearStart), "",
-      DeadI);
+      Type::getInt8Ty(DeadIntrinsic->getContext()), DeadDest,
+      ConstantInt::get(DeadWriteLength->getType(), RearStart), "", DeadI);
   auto *Rear = cast<AnyMemIntrinsic>(DeadIntrinsic->clone());
   Rear->setDest(RearDestGEP);
   Rear->setLength(ConstantInt::get(DeadWriteLength->getType(), RearSize));



More information about the llvm-commits mailing list