[llvm] goldsteinn/dse mem guards (PR #106425)

via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 28 10:52:29 PDT 2024


https://github.com/goldsteinn created https://github.com/llvm/llvm-project/pull/106425

- **[DebugInfo] Regen `dse-after-memcpyopt-merge.ll` test; NFC**
- **[DSE] Don't shrink memory intrinsic if its liable to make codegen worse**


>From 216f7deada12a205212d0f838bc0c3aa54ea97e9 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 28 Aug 2024 10:43:48 -0700
Subject: [PATCH 1/2] [DebugInfo] Regen `dse-after-memcpyopt-merge.ll` test;
 NFC

---
 .../dse/dse-after-memcpyopt-merge.ll          | 76 ++++++++++++++++---
 1 file changed, 67 insertions(+), 9 deletions(-)

diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll
index 2c26cb8c84c7bd..92e487f24ef288 100644
--- a/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll
+++ b/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt %s -S -passes=dse -o - | FileCheck %s --implicit-check-not="call void @llvm.dbg"
 ; RUN: opt --try-experimental-debuginfo-iterators %s -S -passes=dse -o - | FileCheck %s --implicit-check-not="call void @llvm.dbg"
 
@@ -15,25 +16,40 @@
 ;; fragment of the shortened store.
 ;;
 ; CHECK: #dbg_assign({{.*}}, ptr %g, !DIExpression(),
-; CHECK: #dbg_assign(float 0.000000e+00, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 64, 32), ![[ID:[0-9]+]], ptr %arrayidx.i, !DIExpression(),
-; CHECK: #dbg_assign(float 0.000000e+00, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), ![[ID]], ptr %arrayidx3.i, !DIExpression(),
-; CHECK: #dbg_assign(float 0.000000e+00, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), ![[UniqueID1:[0-9]+]], ptr poison, !DIExpression(),
-; CHECK: #dbg_assign(float 0.000000e+00, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), ![[UniqueID2:[0-9]+]], ptr poison, !DIExpression(),
-; CHECK: call void @llvm.memset{{.*}}, !DIAssignID ![[ID]]
 
-; CHECK-DAG: ![[ID]] = distinct !DIAssignID()
-; CHECK-DAG: ![[UniqueID1]] = distinct !DIAssignID()
-; CHECK-DAG: ![[UniqueID2]] = distinct !DIAssignID()
 
 %struct.v = type { [4 x float] }
 
 $_ZN1vC2Ef = comdat any
 
 define dso_local void @_Z1fv() local_unnamed_addr !dbg !7 {
+; CHECK-LABEL: define dso_local void @_Z1fv(
+; CHECK-SAME: ) local_unnamed_addr !dbg [[DBG8:![0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[G:%.*]] = alloca [[STRUCT_V:%.*]], align 4, !DIAssignID [[DIASSIGNID24:![0-9]+]]
+; CHECK-NEXT:      #dbg_assign(i1 poison, [[META12:![0-9]+]], !DIExpression(), [[DIASSIGNID24]], ptr [[G]], !DIExpression(), [[META25:![0-9]+]])
+; CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 2, !dbg [[DBG26:![0-9]+]]
+; CHECK-NEXT:      #dbg_assign(float 0.000000e+00, [[META12]], !DIExpression(DW_OP_LLVM_fragment, 64, 32), [[META34:![0-9]+]], ptr [[ARRAYIDX_I]], !DIExpression(), [[META25]])
+; CHECK-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 1, !dbg [[DBG35:![0-9]+]]
+; CHECK-NEXT:      #dbg_assign(float 0.000000e+00, [[META12]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), [[META34]], ptr [[ARRAYIDX3_I]], !DIExpression(), [[META25]])
+; CHECK-NEXT:    [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 0, !dbg [[DBG36:![0-9]+]]
+; CHECK-NEXT:      #dbg_assign(float 0.000000e+00, [[META12]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), [[META37:![0-9]+]], ptr poison, !DIExpression(), [[META25]])
+; CHECK-NEXT:    [[ARRAYIDX7_I:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 3, !dbg [[DBG38:![0-9]+]]
+; CHECK-NEXT:      #dbg_assign(float 0.000000e+00, [[META12]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), [[META39:![0-9]+]], ptr poison, !DIExpression(), [[META25]])
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast ptr [[ARRAYIDX5_I]] to ptr, !dbg [[DBG40:![0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4, !dbg [[DBG41:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 8, i1 false), !dbg [[DBG41]], !DIAssignID [[META34]]
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 3, !dbg [[META25]]
+; CHECK-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX7]], align 4, !dbg [[META25]], !DIAssignID [[DIASSIGNID42:![0-9]+]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 0, !dbg [[META25]]
+; CHECK-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !dbg [[META25]], !DIAssignID [[DIASSIGNID43:![0-9]+]]
+; CHECK-NEXT:    call void @_Z3escP1v(ptr nonnull [[G]]), !dbg [[DBG40]]
+; CHECK-NEXT:    ret void, !dbg [[DBG44:![0-9]+]]
+;
 entry:
   %g = alloca %struct.v, align 4, !DIAssignID !23
   call void @llvm.dbg.assign(metadata i1 poison, metadata !11, metadata !DIExpression(), metadata !23, metadata ptr %g, metadata !DIExpression()), !dbg !24
-   %arrayidx.i = getelementptr inbounds %struct.v, ptr %g, i64 0, i32 0, i64 2, !dbg !37
+  %arrayidx.i = getelementptr inbounds %struct.v, ptr %g, i64 0, i32 0, i64 2, !dbg !37
   call void @llvm.dbg.assign(metadata float 0.000000e+00, metadata !11, metadata !DIExpression(DW_OP_LLVM_fragment, 64, 32), metadata !39, metadata ptr %arrayidx.i, metadata !DIExpression()), !dbg !24
   %arrayidx3.i = getelementptr inbounds %struct.v, ptr %g, i64 0, i32 0, i64 1, !dbg !40
   call void @llvm.dbg.assign(metadata float 0.000000e+00, metadata !11, metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32), metadata !39, metadata ptr %arrayidx3.i, metadata !DIExpression()), !dbg !24
@@ -125,3 +141,45 @@ declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
 !65 = !DISubroutineType(types: !66)
 !66 = !{null, !30}
 !1000 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+;.
+; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
+; CHECK: [[META1]] = !DIFile(filename: "reduce.cpp", directory: {{.*}})
+; CHECK: [[META2]] = !{}
+; CHECK: [[DBG8]] = distinct !DISubprogram(name: "f", linkageName: "_Z1fv", scope: [[META1]], file: [[META1]], line: 12, type: [[META9:![0-9]+]], scopeLine: 12, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META11:![0-9]+]])
+; CHECK: [[META9]] = !DISubroutineType(types: [[META10:![0-9]+]])
+; CHECK: [[META10]] = !{null}
+; CHECK: [[META11]] = !{[[META12]]}
+; CHECK: [[META12]] = !DILocalVariable(name: "g", scope: [[DBG8]], file: [[META1]], line: 13, type: [[META13:![0-9]+]])
+; CHECK: [[META13]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "v", file: [[META1]], line: 1, size: 128, flags: DIFlagTypePassByValue | DIFlagNonTrivial, elements: [[META14:![0-9]+]], identifier: "_ZTS1v")
+; CHECK: [[META14]] = !{[[META15:![0-9]+]], [[META20:![0-9]+]]}
+; CHECK: [[META15]] = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: [[META13]], file: [[META1]], line: 2, baseType: [[META16:![0-9]+]], size: 128)
+; CHECK: [[META16]] = !DICompositeType(tag: DW_TAG_array_type, baseType: [[META17:![0-9]+]], size: 128, elements: [[META18:![0-9]+]])
+; CHECK: [[META17]] = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
+; CHECK: [[META18]] = !{[[META19:![0-9]+]]}
+; CHECK: [[META19]] = !DISubrange(count: 4)
+; CHECK: [[META20]] = !DISubprogram(name: "v", scope: [[META13]], file: [[META1]], line: 4, type: [[META21:![0-9]+]], scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
+; CHECK: [[META21]] = !DISubroutineType(types: [[META22:![0-9]+]])
+; CHECK: [[META22]] = !{null, [[META23:![0-9]+]], [[META17]]}
+; CHECK: [[META23]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META13]], size: 64, flags: DIFlagArtificial | DIFlagObjectPointer)
+; CHECK: [[DIASSIGNID24]] = distinct !DIAssignID()
+; CHECK: [[META25]] = !DILocation(line: 0, scope: [[DBG8]])
+; CHECK: [[DBG26]] = !DILocation(line: 5, column: 19, scope: [[META27:![0-9]+]], inlinedAt: [[META33:![0-9]+]])
+; CHECK: [[META27]] = distinct !DILexicalBlock(scope: [[META28:![0-9]+]], file: [[META1]], line: 4, column: 14)
+; CHECK: [[META28]] = distinct !DISubprogram(name: "v", linkageName: "_ZN1vC2Ef", scope: [[META13]], file: [[META1]], line: 4, type: [[META21]], scopeLine: 4, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], declaration: [[META20]], retainedNodes: [[META29:![0-9]+]])
+; CHECK: [[META29]] = !{[[META30:![0-9]+]], [[META32:![0-9]+]]}
+; CHECK: [[META30]] = !DILocalVariable(name: "this", arg: 1, scope: [[META28]], type: [[META31:![0-9]+]], flags: DIFlagArtificial | DIFlagObjectPointer)
+; CHECK: [[META31]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META13]], size: 64)
+; CHECK: [[META32]] = !DILocalVariable(name: "d", arg: 2, scope: [[META28]], file: [[META1]], line: 4, type: [[META17]])
+; CHECK: [[META33]] = distinct !DILocation(line: 13, column: 5, scope: [[DBG8]])
+; CHECK: [[META34]] = distinct !DIAssignID()
+; CHECK: [[DBG35]] = !DILocation(line: 5, column: 12, scope: [[META27]], inlinedAt: [[META33]])
+; CHECK: [[DBG36]] = !DILocation(line: 5, column: 5, scope: [[META27]], inlinedAt: [[META33]])
+; CHECK: [[META37]] = distinct !DIAssignID()
+; CHECK: [[DBG38]] = !DILocation(line: 6, column: 5, scope: [[META27]], inlinedAt: [[META33]])
+; CHECK: [[META39]] = distinct !DIAssignID()
+; CHECK: [[DBG40]] = !DILocation(line: 14, column: 3, scope: [[DBG8]])
+; CHECK: [[DBG41]] = !DILocation(line: 5, column: 17, scope: [[META27]], inlinedAt: [[META33]])
+; CHECK: [[DIASSIGNID42]] = distinct !DIAssignID()
+; CHECK: [[DIASSIGNID43]] = distinct !DIAssignID()
+; CHECK: [[DBG44]] = !DILocation(line: 15, column: 1, scope: [[DBG8]])
+;.

>From 302327851fdf08cbd4044a2ff164b85dd1d06a91 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 28 Aug 2024 10:37:46 -0700
Subject: [PATCH 2/2] [DSE] Don't shrink memory intrinsic if its liable to make
 codegen worse

Currently for the following snippet:
    `memcpy(dst, src, 8); dst[7] = 0`
DSE will transform it to:
    `memcpy(dst, src, 7); dst[7] = 0`

However, its typically better to emit this with a full 8 byte `memcpy`
followed by an overwrite of byte 7 as opposed to truncate the `memcpy`
to 7 `memcpy`.

This is because a 8-byte memcpy can be lowered with a single
load/store, whereas a 7 byte memcpy needs 2x load/stores.

Similiar is true for `memset`.

This patch changes the behavior to check if we are liable to be
pessimizing codegen when shrinking `memcpy` to avoid the above and
other similiar cases.
---
 .../Scalar/DeadStoreElimination.cpp           | 47 ++++++++++++++-----
 .../dse/dse-after-memcpyopt-merge.ll          | 31 +++++-------
 .../OverwriteStoreBegin.ll                    | 22 ++++-----
 .../DeadStoreElimination/OverwriteStoreEnd.ll | 12 ++---
 4 files changed, 61 insertions(+), 51 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 992139a95a43d3..9dd1822b3457f6 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -48,6 +48,7 @@
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/BasicBlock.h"
@@ -560,7 +561,8 @@ static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
 
 static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
                          uint64_t &DeadSize, int64_t KillingStart,
-                         uint64_t KillingSize, bool IsOverwriteEnd) {
+                         uint64_t KillingSize, bool IsOverwriteEnd,
+                         const TargetTransformInfo &TTI) {
   auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
   Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
 
@@ -612,6 +614,24 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
   assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
 
   uint64_t NewSize = DeadSize - ToRemoveSize;
+
+  // Check that we aren't going to pessimize codegen by lowering the length. I.e
+  // a memcpy(dst, src, 8) is more efficient than memcpy(dst, src, 7).
+  // These checks are relatively conservative. We bail out if:
+  // 1) We are removing less than 1 store (measured by targets load/store Vec
+  //    width).
+  // 2) We are saving a load/store (assuming loads/stores occur per pow2 block)
+  // 3) We aren't preventing this from going below inline thresh
+  // 4) We are shrinking by less than half of the initial size.
+  uint64_t PrefVecWidth =
+      TTI.getLoadStoreVecRegBitWidth(DeadIntrinsic->getDestAddressSpace()) / 8U;
+  uint64_t InlineThresh = TTI.getMaxMemIntrinsicInlineSizeThreshold();
+  if (ToRemoveSize < PrefVecWidth &&
+      popcount(DeadSize) < popcount(DeadSize - ToRemoveSize) &&
+      (DeadSize <= InlineThresh) == (DeadSize - ToRemoveSize <= InlineThresh) &&
+      ToRemoveSize < DeadSize / 2U)
+    return false;
+
   if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
     // When shortening an atomic memory intrinsic, the newly shortened
     // length must remain an integer multiple of the element size.
@@ -654,7 +674,8 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
 }
 
 static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
-                            int64_t &DeadStart, uint64_t &DeadSize) {
+                            int64_t &DeadStart, uint64_t &DeadSize,
+                            const TargetTransformInfo &TTI) {
   if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
     return false;
 
@@ -672,7 +693,7 @@ static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
       // be non negative due to preceding checks.
       KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
     if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
-                     true)) {
+                     true, TTI)) {
       IntervalMap.erase(OII);
       return true;
     }
@@ -682,7 +703,8 @@ static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
 
 static bool tryToShortenBegin(Instruction *DeadI,
                               OverlapIntervalsTy &IntervalMap,
-                              int64_t &DeadStart, uint64_t &DeadSize) {
+                              int64_t &DeadStart, uint64_t &DeadSize,
+                              const TargetTransformInfo &TTI) {
   if (IntervalMap.empty() || !isShortenableAtTheBeginning(DeadI))
     return false;
 
@@ -701,7 +723,7 @@ static bool tryToShortenBegin(Instruction *DeadI,
     assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
            "Should have been handled as OW_Complete");
     if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
-                     false)) {
+                     false, TTI)) {
       IntervalMap.erase(OII);
       return true;
     }
@@ -824,6 +846,7 @@ struct DSEState {
   DominatorTree &DT;
   PostDominatorTree &PDT;
   const TargetLibraryInfo &TLI;
+  const TargetTransformInfo &TTI;
   const DataLayout &DL;
   const LoopInfo &LI;
 
@@ -868,9 +891,9 @@ struct DSEState {
 
   DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
            PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
-           const LoopInfo &LI)
+           const TargetTransformInfo &TTI, const LoopInfo &LI)
       : F(F), AA(AA), EI(DT, &LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
-        PDT(PDT), TLI(TLI), DL(F.getDataLayout()), LI(LI) {
+        PDT(PDT), TLI(TLI), TTI(TTI), DL(F.getDataLayout()), LI(LI) {
     // Collect blocks with throwing instructions not modeled in MemorySSA and
     // alloc-like objects.
     unsigned PO = 0;
@@ -2066,10 +2089,10 @@ struct DSEState {
       uint64_t DeadSize = Loc.Size.getValue();
       GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
       OverlapIntervalsTy &IntervalMap = OI.second;
-      Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
+      Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize, TTI);
       if (IntervalMap.empty())
         continue;
-      Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
+      Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize, TTI);
     }
     return Changed;
   }
@@ -2137,10 +2160,11 @@ struct DSEState {
 static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
                                 DominatorTree &DT, PostDominatorTree &PDT,
                                 const TargetLibraryInfo &TLI,
+                                const TargetTransformInfo &TTI,
                                 const LoopInfo &LI) {
   bool MadeChange = false;
 
-  DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
+  DSEState State(F, AA, MSSA, DT, PDT, TLI, TTI, LI);
   // For each store:
   for (unsigned I = 0; I < State.MemDefs.size(); I++) {
     MemoryDef *KillingDef = State.MemDefs[I];
@@ -2332,12 +2356,13 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
 PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
   AliasAnalysis &AA = AM.getResult<AAManager>(F);
   const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+  const TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
   DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
   MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
   PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
   LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
 
-  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
+  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, TTI, LI);
 
 #ifdef LLVM_ENABLE_STATS
   if (AreStatisticsEnabled())
diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll
index 92e487f24ef288..afb30e07889ee9 100644
--- a/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll
+++ b/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll
@@ -33,18 +33,13 @@ define dso_local void @_Z1fv() local_unnamed_addr !dbg !7 {
 ; CHECK-NEXT:    [[ARRAYIDX3_I:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 1, !dbg [[DBG35:![0-9]+]]
 ; CHECK-NEXT:      #dbg_assign(float 0.000000e+00, [[META12]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), [[META34]], ptr [[ARRAYIDX3_I]], !DIExpression(), [[META25]])
 ; CHECK-NEXT:    [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 0, !dbg [[DBG36:![0-9]+]]
-; CHECK-NEXT:      #dbg_assign(float 0.000000e+00, [[META12]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), [[META37:![0-9]+]], ptr poison, !DIExpression(), [[META25]])
-; CHECK-NEXT:    [[ARRAYIDX7_I:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 3, !dbg [[DBG38:![0-9]+]]
-; CHECK-NEXT:      #dbg_assign(float 0.000000e+00, [[META12]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), [[META39:![0-9]+]], ptr poison, !DIExpression(), [[META25]])
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast ptr [[ARRAYIDX5_I]] to ptr, !dbg [[DBG40:![0-9]+]]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4, !dbg [[DBG41:![0-9]+]]
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 8, i1 false), !dbg [[DBG41]], !DIAssignID [[META34]]
-; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 3, !dbg [[META25]]
-; CHECK-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX7]], align 4, !dbg [[META25]], !DIAssignID [[DIASSIGNID42:![0-9]+]]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 0, !dbg [[META25]]
-; CHECK-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !dbg [[META25]], !DIAssignID [[DIASSIGNID43:![0-9]+]]
-; CHECK-NEXT:    call void @_Z3escP1v(ptr nonnull [[G]]), !dbg [[DBG40]]
-; CHECK-NEXT:    ret void, !dbg [[DBG44:![0-9]+]]
+; CHECK-NEXT:      #dbg_assign(float 0.000000e+00, [[META12]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), [[META34]], ptr [[ARRAYIDX5_I]], !DIExpression(), [[META25]])
+; CHECK-NEXT:    [[ARRAYIDX7_I:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 3, !dbg [[DBG37:![0-9]+]]
+; CHECK-NEXT:      #dbg_assign(float 0.000000e+00, [[META12]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), [[META34]], ptr [[ARRAYIDX7_I]], !DIExpression(), [[META25]])
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast ptr [[ARRAYIDX5_I]] to ptr, !dbg [[DBG38:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 16, i1 false), !dbg [[DBG39:![0-9]+]], !DIAssignID [[META34]]
+; CHECK-NEXT:    call void @_Z3escP1v(ptr nonnull [[G]]), !dbg [[DBG38]]
+; CHECK-NEXT:    ret void, !dbg [[DBG40:![0-9]+]]
 ;
 entry:
   %g = alloca %struct.v, align 4, !DIAssignID !23
@@ -174,12 +169,8 @@ declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
 ; CHECK: [[META34]] = distinct !DIAssignID()
 ; CHECK: [[DBG35]] = !DILocation(line: 5, column: 12, scope: [[META27]], inlinedAt: [[META33]])
 ; CHECK: [[DBG36]] = !DILocation(line: 5, column: 5, scope: [[META27]], inlinedAt: [[META33]])
-; CHECK: [[META37]] = distinct !DIAssignID()
-; CHECK: [[DBG38]] = !DILocation(line: 6, column: 5, scope: [[META27]], inlinedAt: [[META33]])
-; CHECK: [[META39]] = distinct !DIAssignID()
-; CHECK: [[DBG40]] = !DILocation(line: 14, column: 3, scope: [[DBG8]])
-; CHECK: [[DBG41]] = !DILocation(line: 5, column: 17, scope: [[META27]], inlinedAt: [[META33]])
-; CHECK: [[DIASSIGNID42]] = distinct !DIAssignID()
-; CHECK: [[DIASSIGNID43]] = distinct !DIAssignID()
-; CHECK: [[DBG44]] = !DILocation(line: 15, column: 1, scope: [[DBG8]])
+; CHECK: [[DBG37]] = !DILocation(line: 6, column: 5, scope: [[META27]], inlinedAt: [[META33]])
+; CHECK: [[DBG38]] = !DILocation(line: 14, column: 3, scope: [[DBG8]])
+; CHECK: [[DBG39]] = !DILocation(line: 5, column: 17, scope: [[META27]], inlinedAt: [[META33]])
+; CHECK: [[DBG40]] = !DILocation(line: 15, column: 1, scope: [[DBG8]])
 ;.
diff --git a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
index bc1756f6ca9d1b..893084df00f58e 100644
--- a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
@@ -23,8 +23,8 @@ define void @write4to7_weird_element_type(ptr nocapture %p) {
 ; CHECK-LABEL: @write4to7_weird_element_type(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 24, i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i1 false)
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1
 ; CHECK-NEXT:    store i32 1, ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    ret void
@@ -119,8 +119,7 @@ entry:
 define void @write0to7(ptr nocapture %p) {
 ; CHECK-LABEL: @write0to7(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 8
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 32, i1 false)
 ; CHECK-NEXT:    store i64 1, ptr [[P]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -135,8 +134,7 @@ entry:
 define void @write0to7_atomic(ptr nocapture %p) {
 ; CHECK-LABEL: @write0to7_atomic(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 8
-; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i32 4)
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 32, i32 4)
 ; CHECK-NEXT:    store atomic i64 1, ptr [[P]] unordered, align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -236,8 +234,7 @@ define void @write2to10(ptr nocapture %p) {
 ; CHECK-LABEL: @write2to10(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 28, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ARRAYIDX0]], i8 0, i64 32, i1 false)
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1
 ; CHECK-NEXT:    store i64 1, ptr [[ARRAYIDX2]], align 8
 ; CHECK-NEXT:    ret void
@@ -254,8 +251,7 @@ define void @write2to10_atomic(ptr nocapture %p) {
 ; CHECK-LABEL: @write2to10_atomic(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
-; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 28, i32 4)
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[ARRAYIDX0]], i8 0, i64 32, i32 4)
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1
 ; CHECK-NEXT:    store atomic i64 1, ptr [[ARRAYIDX2]] unordered, align 8
 ; CHECK-NEXT:    ret void
@@ -360,8 +356,7 @@ define void @ow_begin_align1(ptr nocapture %p) {
 ; CHECK-LABEL: @ow_begin_align1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 7
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 [[TMP0]], i8 0, i64 25, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 [[P1]], i8 0, i64 32, i1 false)
 ; CHECK-NEXT:    store i64 1, ptr [[P]], align 1
 ; CHECK-NEXT:    ret void
 ;
@@ -376,8 +371,7 @@ define void @ow_end_align4(ptr nocapture %p) {
 ; CHECK-LABEL: @ow_end_align4(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 4
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 28, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[P1]], i8 0, i64 32, i1 false)
 ; CHECK-NEXT:    store i64 1, ptr [[P]], align 1
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
index ac8eee7088ad8f..dc360a94662913 100644
--- a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
@@ -63,7 +63,7 @@ entry:
 define void @write28to32(ptr nocapture %p) nounwind uwtable ssp {
 ; CHECK-LABEL: @write28to32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 28, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 32, i1 false)
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
 ; CHECK-NEXT:    store i32 1, ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    ret void
@@ -78,7 +78,7 @@ entry:
 define void @write28to32_atomic(ptr nocapture %p) nounwind uwtable ssp {
 ; CHECK-LABEL: @write28to32_atomic(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 28, i32 4)
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 32, i32 4)
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
 ; CHECK-NEXT:    store atomic i32 1, ptr [[ARRAYIDX1]] unordered, align 4
 ; CHECK-NEXT:    ret void
@@ -259,8 +259,8 @@ define void @write16To23AndThen24To31(ptr nocapture %P, i64 %n64, i32 %n32, i16
 ; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr align 8 [[P:%.*]], i8 0, i64 16, i1 false)
 ; CHECK-NEXT:    [[BASE64_2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 2
 ; CHECK-NEXT:    [[BASE64_3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 3
-; CHECK-NEXT:    store i64 3, ptr [[BASE64_2]]
-; CHECK-NEXT:    store i64 3, ptr [[BASE64_3]]
+; CHECK-NEXT:    store i64 3, ptr [[BASE64_2]], align 8
+; CHECK-NEXT:    store i64 3, ptr [[BASE64_3]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -345,7 +345,7 @@ define void @ow_end_align1(ptr nocapture %p) {
 ; CHECK-LABEL: @ow_end_align1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 [[P1]], i8 0, i64 27, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 [[P1]], i8 0, i64 32, i1 false)
 ; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 27
 ; CHECK-NEXT:    store i64 1, ptr [[P2]], align 1
 ; CHECK-NEXT:    ret void
@@ -362,7 +362,7 @@ define void @ow_end_align4(ptr nocapture %p) {
 ; CHECK-LABEL: @ow_end_align4(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[P1]], i8 0, i64 28, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[P1]], i8 0, i64 32, i1 false)
 ; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 27
 ; CHECK-NEXT:    store i64 1, ptr [[P2]], align 1
 ; CHECK-NEXT:    ret void



More information about the llvm-commits mailing list