[llvm] [DSE] Optimizing shrinking of memory intrinsic (PR #106425)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 5 11:23:43 PDT 2024
https://github.com/goldsteinn updated https://github.com/llvm/llvm-project/pull/106425
>From a70a8d1cd1fdb8446e17414f65720292e58fa9ff Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 28 Aug 2024 10:43:48 -0700
Subject: [PATCH 1/3] [DebugInfo] Regen `dse-after-memcpyopt-merge.ll` test;
NFC
---
.../dse/dse-after-memcpyopt-merge.ll | 77 ++++++++++++++++---
1 file changed, 67 insertions(+), 10 deletions(-)
diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll
index 2c26cb8c84c7bd..74ac4d763e5e5b 100644
--- a/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll
+++ b/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt %s -S -passes=dse -o - | FileCheck %s --implicit-check-not="call void @llvm.dbg"
; RUN: opt --try-experimental-debuginfo-iterators %s -S -passes=dse -o - | FileCheck %s --implicit-check-not="call void @llvm.dbg"
@@ -14,26 +15,40 @@
;; Check that there's an unlinked dbg.assign inserted after each overlapping
;; fragment of the shortened store.
;;
-; CHECK: #dbg_assign({{.*}}, ptr %g, !DIExpression(),
-; CHECK: #dbg_assign(float 0.000000e+00, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 64, 32), ![[ID:[0-9]+]], ptr %arrayidx.i, !DIExpression(),
-; CHECK: #dbg_assign(float 0.000000e+00, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), ![[ID]], ptr %arrayidx3.i, !DIExpression(),
-; CHECK: #dbg_assign(float 0.000000e+00, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), ![[UniqueID1:[0-9]+]], ptr poison, !DIExpression(),
-; CHECK: #dbg_assign(float 0.000000e+00, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), ![[UniqueID2:[0-9]+]], ptr poison, !DIExpression(),
-; CHECK: call void @llvm.memset{{.*}}, !DIAssignID ![[ID]]
-; CHECK-DAG: ![[ID]] = distinct !DIAssignID()
-; CHECK-DAG: ![[UniqueID1]] = distinct !DIAssignID()
-; CHECK-DAG: ![[UniqueID2]] = distinct !DIAssignID()
%struct.v = type { [4 x float] }
$_ZN1vC2Ef = comdat any
define dso_local void @_Z1fv() local_unnamed_addr !dbg !7 {
+; CHECK-LABEL: define dso_local void @_Z1fv(
+; CHECK-SAME: ) local_unnamed_addr !dbg [[DBG8:![0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[G:%.*]] = alloca [[STRUCT_V:%.*]], align 4, !DIAssignID [[DIASSIGNID24:![0-9]+]]
+; CHECK-NEXT: #dbg_assign(i1 poison, [[META12:![0-9]+]], !DIExpression(), [[DIASSIGNID24]], ptr [[G]], !DIExpression(), [[META25:![0-9]+]])
+; CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 2, !dbg [[DBG26:![0-9]+]]
+; CHECK-NEXT: #dbg_assign(float 0.000000e+00, [[META12]], !DIExpression(DW_OP_LLVM_fragment, 64, 32), [[META34:![0-9]+]], ptr [[ARRAYIDX_I]], !DIExpression(), [[META25]])
+; CHECK-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 1, !dbg [[DBG35:![0-9]+]]
+; CHECK-NEXT: #dbg_assign(float 0.000000e+00, [[META12]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), [[META34]], ptr [[ARRAYIDX3_I]], !DIExpression(), [[META25]])
+; CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 0, !dbg [[DBG36:![0-9]+]]
+; CHECK-NEXT: #dbg_assign(float 0.000000e+00, [[META12]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), [[META37:![0-9]+]], ptr poison, !DIExpression(), [[META25]])
+; CHECK-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 3, !dbg [[DBG38:![0-9]+]]
+; CHECK-NEXT: #dbg_assign(float 0.000000e+00, [[META12]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), [[META39:![0-9]+]], ptr poison, !DIExpression(), [[META25]])
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast ptr [[ARRAYIDX5_I]] to ptr, !dbg [[DBG40:![0-9]+]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4, !dbg [[DBG41:![0-9]+]]
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 8, i1 false), !dbg [[DBG41]], !DIAssignID [[META34]]
+; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 3, !dbg [[META25]]
+; CHECK-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX7]], align 4, !dbg [[META25]], !DIAssignID [[DIASSIGNID42:![0-9]+]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 0, !dbg [[META25]]
+; CHECK-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !dbg [[META25]], !DIAssignID [[DIASSIGNID43:![0-9]+]]
+; CHECK-NEXT: call void @_Z3escP1v(ptr nonnull [[G]]), !dbg [[DBG40]]
+; CHECK-NEXT: ret void, !dbg [[DBG44:![0-9]+]]
+;
entry:
%g = alloca %struct.v, align 4, !DIAssignID !23
call void @llvm.dbg.assign(metadata i1 poison, metadata !11, metadata !DIExpression(), metadata !23, metadata ptr %g, metadata !DIExpression()), !dbg !24
- %arrayidx.i = getelementptr inbounds %struct.v, ptr %g, i64 0, i32 0, i64 2, !dbg !37
+ %arrayidx.i = getelementptr inbounds %struct.v, ptr %g, i64 0, i32 0, i64 2, !dbg !37
call void @llvm.dbg.assign(metadata float 0.000000e+00, metadata !11, metadata !DIExpression(DW_OP_LLVM_fragment, 64, 32), metadata !39, metadata ptr %arrayidx.i, metadata !DIExpression()), !dbg !24
%arrayidx3.i = getelementptr inbounds %struct.v, ptr %g, i64 0, i32 0, i64 1, !dbg !40
call void @llvm.dbg.assign(metadata float 0.000000e+00, metadata !11, metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32), metadata !39, metadata ptr %arrayidx3.i, metadata !DIExpression()), !dbg !24
@@ -125,3 +140,45 @@ declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
!65 = !DISubroutineType(types: !66)
!66 = !{null, !30}
!1000 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+;.
+; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
+; CHECK: [[META1]] = !DIFile(filename: "reduce.cpp", directory: {{.*}})
+; CHECK: [[META2]] = !{}
+; CHECK: [[DBG8]] = distinct !DISubprogram(name: "f", linkageName: "_Z1fv", scope: [[META1]], file: [[META1]], line: 12, type: [[META9:![0-9]+]], scopeLine: 12, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META11:![0-9]+]])
+; CHECK: [[META9]] = !DISubroutineType(types: [[META10:![0-9]+]])
+; CHECK: [[META10]] = !{null}
+; CHECK: [[META11]] = !{[[META12]]}
+; CHECK: [[META12]] = !DILocalVariable(name: "g", scope: [[DBG8]], file: [[META1]], line: 13, type: [[META13:![0-9]+]])
+; CHECK: [[META13]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "v", file: [[META1]], line: 1, size: 128, flags: DIFlagTypePassByValue | DIFlagNonTrivial, elements: [[META14:![0-9]+]], identifier: "_ZTS1v")
+; CHECK: [[META14]] = !{[[META15:![0-9]+]], [[META20:![0-9]+]]}
+; CHECK: [[META15]] = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: [[META13]], file: [[META1]], line: 2, baseType: [[META16:![0-9]+]], size: 128)
+; CHECK: [[META16]] = !DICompositeType(tag: DW_TAG_array_type, baseType: [[META17:![0-9]+]], size: 128, elements: [[META18:![0-9]+]])
+; CHECK: [[META17]] = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
+; CHECK: [[META18]] = !{[[META19:![0-9]+]]}
+; CHECK: [[META19]] = !DISubrange(count: 4)
+; CHECK: [[META20]] = !DISubprogram(name: "v", scope: [[META13]], file: [[META1]], line: 4, type: [[META21:![0-9]+]], scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
+; CHECK: [[META21]] = !DISubroutineType(types: [[META22:![0-9]+]])
+; CHECK: [[META22]] = !{null, [[META23:![0-9]+]], [[META17]]}
+; CHECK: [[META23]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META13]], size: 64, flags: DIFlagArtificial | DIFlagObjectPointer)
+; CHECK: [[DIASSIGNID24]] = distinct !DIAssignID()
+; CHECK: [[META25]] = !DILocation(line: 0, scope: [[DBG8]])
+; CHECK: [[DBG26]] = !DILocation(line: 5, column: 19, scope: [[META27:![0-9]+]], inlinedAt: [[META33:![0-9]+]])
+; CHECK: [[META27]] = distinct !DILexicalBlock(scope: [[META28:![0-9]+]], file: [[META1]], line: 4, column: 14)
+; CHECK: [[META28]] = distinct !DISubprogram(name: "v", linkageName: "_ZN1vC2Ef", scope: [[META13]], file: [[META1]], line: 4, type: [[META21]], scopeLine: 4, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], declaration: [[META20]], retainedNodes: [[META29:![0-9]+]])
+; CHECK: [[META29]] = !{[[META30:![0-9]+]], [[META32:![0-9]+]]}
+; CHECK: [[META30]] = !DILocalVariable(name: "this", arg: 1, scope: [[META28]], type: [[META31:![0-9]+]], flags: DIFlagArtificial | DIFlagObjectPointer)
+; CHECK: [[META31]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META13]], size: 64)
+; CHECK: [[META32]] = !DILocalVariable(name: "d", arg: 2, scope: [[META28]], file: [[META1]], line: 4, type: [[META17]])
+; CHECK: [[META33]] = distinct !DILocation(line: 13, column: 5, scope: [[DBG8]])
+; CHECK: [[META34]] = distinct !DIAssignID()
+; CHECK: [[DBG35]] = !DILocation(line: 5, column: 12, scope: [[META27]], inlinedAt: [[META33]])
+; CHECK: [[DBG36]] = !DILocation(line: 5, column: 5, scope: [[META27]], inlinedAt: [[META33]])
+; CHECK: [[META37]] = distinct !DIAssignID()
+; CHECK: [[DBG38]] = !DILocation(line: 6, column: 5, scope: [[META27]], inlinedAt: [[META33]])
+; CHECK: [[META39]] = distinct !DIAssignID()
+; CHECK: [[DBG40]] = !DILocation(line: 14, column: 3, scope: [[DBG8]])
+; CHECK: [[DBG41]] = !DILocation(line: 5, column: 17, scope: [[META27]], inlinedAt: [[META33]])
+; CHECK: [[DIASSIGNID42]] = distinct !DIAssignID()
+; CHECK: [[DIASSIGNID43]] = distinct !DIAssignID()
+; CHECK: [[DBG44]] = !DILocation(line: 15, column: 1, scope: [[DBG8]])
+;.
>From 51c320df7210d7359bcb0a90b4f49da6aa59a33d Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Thu, 29 Aug 2024 11:03:47 -0700
Subject: [PATCH 2/3] [DSE] Add more tests for optimizing shrinkage of
memset/memcpy; NFC
---
.../OverwriteStoreBegin.ll | 160 ++++++++++++++++--
.../DeadStoreElimination/OverwriteStoreEnd.ll | 98 ++++++++++-
2 files changed, 241 insertions(+), 17 deletions(-)
diff --git a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
index bc1756f6ca9d1b..6a257792db833c 100644
--- a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=dse -S | FileCheck %s
+; RUN: opt < %s -passes=dse -S | FileCheck %s --check-prefixes=CHECK,CHECK-MEM4
+; RUN: opt < %s -mtriple=x86_64-unknown-unknown -passes=dse -S | FileCheck %s --check-prefixes=CHECK,CHECK-MEM16
define void @write4to7(ptr nocapture %p) {
; CHECK-LABEL: @write4to7(
@@ -23,8 +24,8 @@ define void @write4to7_weird_element_type(ptr nocapture %p) {
; CHECK-LABEL: @write4to7_weird_element_type(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 24, i1 false)
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i1 false)
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1
; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT: ret void
@@ -269,14 +270,23 @@ entry:
}
define void @write8To15AndThen0To7(ptr nocapture %P) {
-; CHECK-LABEL: @write8To15AndThen0To7(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 16
-; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 8 [[TMP0]], i8 0, i64 16, i1 false)
-; CHECK-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1
-; CHECK-NEXT: store i64 1, ptr [[BASE64_1]], align 4
-; CHECK-NEXT: store i64 2, ptr [[P]], align 4
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @write8To15AndThen0To7(
+; CHECK-MEM4-NEXT: entry:
+; CHECK-MEM4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 16
+; CHECK-MEM4-NEXT: tail call void @llvm.memset.p0.i64(ptr align 8 [[TMP0]], i8 0, i64 16, i1 false)
+; CHECK-MEM4-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1
+; CHECK-MEM4-NEXT: store i64 1, ptr [[BASE64_1]], align 4
+; CHECK-MEM4-NEXT: store i64 2, ptr [[P]], align 4
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @write8To15AndThen0To7(
+; CHECK-MEM16-NEXT: entry:
+; CHECK-MEM16-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 16
+; CHECK-MEM16-NEXT: tail call void @llvm.memset.p0.i64(ptr align 8 [[TMP0]], i8 0, i64 16, i1 false)
+; CHECK-MEM16-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1
+; CHECK-MEM16-NEXT: store i64 1, ptr [[BASE64_1]], align 8
+; CHECK-MEM16-NEXT: store i64 2, ptr [[P]], align 8
+; CHECK-MEM16-NEXT: ret void
;
entry:
@@ -402,3 +412,131 @@ entry:
store i64 1, ptr %p, align 1
ret void
}
+
+define void @memset_optimize_size_lo_33_to_x86_32_generic_28(ptr %p) {
+; CHECK-LABEL: @memset_optimize_size_lo_33_to_x86_32_generic_28(
+; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 3
+; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 5
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP1]], i8 0, i64 28, i1 false)
+; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-NEXT: ret void
+;
+ %p0 = getelementptr inbounds i8, ptr %p, i64 3
+ %p1 = getelementptr inbounds i8, ptr %p, i64 0
+ call void @llvm.memset.p0.i64(ptr align 1 %p0, i8 0, i64 33, i1 false)
+ store i64 0, ptr %p1, align 1
+ ret void
+}
+
+define void @memset_optimize_size_lo_33_misaligned_x86_fail_generic_save_unit(ptr %p) {
+; CHECK-LABEL: @memset_optimize_size_lo_33_misaligned_x86_fail_generic_save_unit(
+; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 3
+; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 4
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[TMP1]], i8 0, i64 29, i1 false)
+; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-NEXT: ret void
+;
+ %p0 = getelementptr inbounds i8, ptr %p, i64 3
+ %p1 = getelementptr inbounds i8, ptr %p, i64 0
+ call void @llvm.memset.p0.i64(ptr align 2 %p0, i8 0, i64 33, i1 false)
+ store i64 0, ptr %p1, align 1
+ ret void
+}
+
+define void @memset_optimize_size_lo_32_x86_misaligned_fail_generic_save_unit2(ptr %p) {
+; CHECK-LABEL: @memset_optimize_size_lo_32_x86_misaligned_fail_generic_save_unit2(
+; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
+; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 4
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[TMP1]], i8 0, i64 28, i1 false)
+; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-NEXT: ret void
+;
+ %p0 = getelementptr inbounds i8, ptr %p, i64 4
+ %p1 = getelementptr inbounds i8, ptr %p, i64 0
+ call void @llvm.memset.p0.i64(ptr align 2 %p0, i8 0, i64 32, i1 false)
+ store i64 0, ptr %p1, align 1
+ ret void
+}
+
+define void @memset_optimize_size_lo_34_to_32(ptr %p) {
+; CHECK-LABEL: @memset_optimize_size_lo_34_to_32(
+; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
+; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 4
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[TMP1]], i8 0, i64 30, i1 false)
+; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-NEXT: ret void
+;
+ %p0 = getelementptr inbounds i8, ptr %p, i64 4
+ %p1 = getelementptr inbounds i8, ptr %p, i64 0
+ call void @llvm.memset.p0.i64(ptr align 2 %p0, i8 0, i64 34, i1 false)
+ store i64 0, ptr %p1, align 1
+ ret void
+}
+
+define void @memset_optimize_size_lo_34_x86_misaligned_fail_generic_save_unit(ptr %p) {
+; CHECK-LABEL: @memset_optimize_size_lo_34_x86_misaligned_fail_generic_save_unit(
+; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
+; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 4
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 30, i1 false)
+; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-NEXT: ret void
+;
+ %p0 = getelementptr inbounds i8, ptr %p, i64 4
+ %p1 = getelementptr inbounds i8, ptr %p, i64 0
+ call void @llvm.memset.p0.i64(ptr align 4 %p0, i8 0, i64 34, i1 false)
+ store i64 0, ptr %p1, align 1
+ ret void
+}
+
+define void @memset_optimize_size_lo_34_to_32_no_align_okay(ptr %p) {
+; CHECK-LABEL: @memset_optimize_size_lo_34_to_32_no_align_okay(
+; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
+; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 4
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP1]], i8 0, i64 30, i1 false)
+; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-NEXT: ret void
+;
+ %p0 = getelementptr inbounds i8, ptr %p, i64 4
+ %p1 = getelementptr inbounds i8, ptr %p, i64 0
+ call void @llvm.memset.p0.i64(ptr align 1 %p0, i8 0, i64 34, i1 false)
+ store i64 0, ptr %p1, align 1
+ ret void
+}
+
+define void @memset_optimize_size_lo_33_to_31_save_unit_no_change(ptr %p) {
+; CHECK-LABEL: @memset_optimize_size_lo_33_to_31_save_unit_no_change(
+; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
+; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 2
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[TMP1]], i8 0, i64 31, i1 false)
+; CHECK-NEXT: store i32 0, ptr [[P1]], align 1
+; CHECK-NEXT: ret void
+;
+ %p0 = getelementptr inbounds i8, ptr %p, i64 1
+ %p1 = getelementptr inbounds i8, ptr %p, i64 0
+ call void @llvm.memset.p0.i64(ptr align 2 %p0, i8 0, i64 33, i1 false)
+ store i32 0, ptr %p1, align 1
+ ret void
+}
+
+define void @memset_optimize_size_lo_36_to_32(ptr %p) {
+; CHECK-LABEL: @memset_optimize_size_lo_36_to_32(
+; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
+; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 4
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 32, i1 false)
+; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-NEXT: ret void
+;
+ %p0 = getelementptr inbounds i8, ptr %p, i64 1
+ %p1 = getelementptr inbounds i8, ptr %p, i64 0
+ call void @llvm.memset.p0.i64(ptr align 4 %p0, i8 0, i64 36, i1 false)
+ store i64 0, ptr %p1, align 1
+ ret void
+}
diff --git a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
index ac8eee7088ad8f..9fc3f827e614db 100644
--- a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
@@ -1,9 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=dse -S | FileCheck %s
+; RUN: opt < %s -passes=dse -S | FileCheck %s --check-prefixes=CHECK,CHECK-MEM4
+; RUN: opt < %s -mtriple=x86_64-unknown-unknown -passes=dse -S | FileCheck %s --check-prefixes=CHECK,CHECK-MEM16
+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-%struct.vec2 = type { <4 x i32>, <4 x i32> }
-%struct.vec2plusi = type { <4 x i32>, <4 x i32>, i32 }
+%struct.vec2 = type {
+<4 x i32>, <4 x i32>
+}
+
+%struct.vec2plusi = type {
+<4 x i32>, <4 x i32>, i32
+}
@glob1 = global %struct.vec2 zeroinitializer, align 16
@glob2 = global %struct.vec2plusi zeroinitializer, align 16
@@ -231,7 +238,9 @@ declare void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr nocapture, ptr
declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
declare void @llvm.memset.element.unordered.atomic.p0.i64(ptr nocapture, i8, i64, i32) nounwind
-%struct.trapframe = type { i64, i64, i64 }
+%struct.trapframe = type {
+i64, i64, i64
+}
; bugzilla 11455 - make sure negative GEP's don't break this optimisation
define void @cpu_lwp_fork(ptr %md_regs, i64 %pcb_rsp0) nounwind uwtable noinline ssp {
@@ -259,8 +268,8 @@ define void @write16To23AndThen24To31(ptr nocapture %P, i64 %n64, i32 %n32, i16
; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 8 [[P:%.*]], i8 0, i64 16, i1 false)
; CHECK-NEXT: [[BASE64_2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 2
; CHECK-NEXT: [[BASE64_3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 3
-; CHECK-NEXT: store i64 3, ptr [[BASE64_2]]
-; CHECK-NEXT: store i64 3, ptr [[BASE64_3]]
+; CHECK-NEXT: store i64 3, ptr [[BASE64_2]], align 8
+; CHECK-NEXT: store i64 3, ptr [[BASE64_3]], align 8
; CHECK-NEXT: ret void
;
entry:
@@ -392,3 +401,80 @@ entry:
ret void
}
+define void @memset_optimize_size_hi_31_to_24(ptr %p) {
+; CHECK-LABEL: @memset_optimize_size_hi_31_to_24(
+; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
+; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 23
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P0]], i8 0, i64 23, i1 false)
+; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-NEXT: ret void
+;
+ %p0 = getelementptr inbounds i8, ptr %p, i64 0
+ %p1 = getelementptr inbounds i8, ptr %p, i64 23
+ call void @llvm.memset.p0.i64(ptr align 1 %p0, i8 0, i64 31, i1 false)
+ store i64 0, ptr %p1, align 1
+ ret void
+}
+
+define void @memset_optimize_size_hi_32_no_change_x86_change_generic(ptr %p) {
+; CHECK-LABEL: @memset_optimize_size_hi_32_no_change_x86_change_generic(
+; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
+; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 28
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P0]], i8 0, i64 28, i1 false)
+; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-NEXT: ret void
+;
+ %p0 = getelementptr inbounds i8, ptr %p, i64 0
+ %p1 = getelementptr inbounds i8, ptr %p, i64 28
+ call void @llvm.memset.p0.i64(ptr align 1 %p0, i8 0, i64 32, i1 false)
+ store i64 0, ptr %p1, align 1
+ ret void
+}
+
+define void @memset_optimize_size_hi_28_to_24(ptr %p) {
+; CHECK-LABEL: @memset_optimize_size_hi_28_to_24(
+; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
+; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 21
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[P0]], i8 0, i64 24, i1 false)
+; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-NEXT: ret void
+;
+ %p0 = getelementptr inbounds i8, ptr %p, i64 0
+ %p1 = getelementptr inbounds i8, ptr %p, i64 21
+ call void @llvm.memset.p0.i64(ptr align 8 %p0, i8 0, i64 28, i1 false)
+ store i64 0, ptr %p1, align 1
+ ret void
+}
+
+define void @memset_optimize_size_hi_31_to_28(ptr %p) {
+; CHECK-LABEL: @memset_optimize_size_hi_31_to_28(
+; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
+; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 27
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[P0]], i8 0, i64 28, i1 false)
+; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-NEXT: ret void
+;
+ %p0 = getelementptr inbounds i8, ptr %p, i64 0
+ %p1 = getelementptr inbounds i8, ptr %p, i64 27
+ call void @llvm.memset.p0.i64(ptr align 2 %p0, i8 0, i64 31, i1 false)
+ store i64 0, ptr %p1, align 1
+ ret void
+}
+
+define void @memset_optimize_size_hi_33_to_x86_32_generic_28(ptr %p) {
+; CHECK-LABEL: @memset_optimize_size_hi_33_to_x86_32_generic_28(
+; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
+; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 27
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P0]], i8 0, i64 28, i1 false)
+; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-NEXT: ret void
+;
+ %p0 = getelementptr inbounds i8, ptr %p, i64 0
+ %p1 = getelementptr inbounds i8, ptr %p, i64 27
+ call void @llvm.memset.p0.i64(ptr align 4 %p0, i8 0, i64 33, i1 false)
+ store i64 0, ptr %p1, align 1
+ ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-MEM16: {{.*}}
+; CHECK-MEM4: {{.*}}
>From b555464f8a9da42f4c31cc27941a01a33cee0d1e Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 28 Aug 2024 10:37:46 -0700
Subject: [PATCH 3/3] [DSE] Optimizing shrinkinking of memory intrinsic
Currently for the following snippet:
`memcpy(dst, src, 8); dst[7] = 0`
DSE will transform it to:
`memcpy(dst, src, 7); dst[7] = 0`
Likewise if we have:
`memcpy(dst, src, 9); dst[7] = 0; dst[8] = 0`
DSE will transform it to:
`memcpy(dst, src, 7); dst[7] = 0`
However, in both cases we would prefer to emit an 8-byte `memcpy`
followed by any overwrite of the trailing byte(s).
This patch attempts to optimize the new intrinsic length within the
available range of the original size and the maximally shrunk size.
---
.../Scalar/DeadStoreElimination.cpp | 141 ++++++++--
.../dse/dse-after-memcpyopt-merge.ll | 11 +-
.../OverwriteStoreBegin.ll | 177 ++++++++-----
.../DeadStoreElimination/OverwriteStoreEnd.ll | 242 ++++++++++++------
.../stores-of-existing-values.ll | 4 +-
5 files changed, 411 insertions(+), 164 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index a37f295abbd31c..3c83adb7ebabee 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -48,6 +48,7 @@
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
@@ -558,9 +559,10 @@ static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
for_each(LinkedDVRAssigns, InsertAssignForOverlap);
}
-static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
- uint64_t &DeadSize, int64_t KillingStart,
- uint64_t KillingSize, bool IsOverwriteEnd) {
+static bool tryToShorten(Instruction *DeadI, int64_t DeadStart,
+ uint64_t DeadSize, int64_t KillingStart,
+ uint64_t KillingSize, bool IsOverwriteEnd,
+ const TargetTransformInfo &TTI) {
auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
@@ -583,11 +585,7 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
// Compute start and size of the region to remove. Make sure 'PrefAlign' is
// maintained on the remaining store.
if (IsOverwriteEnd) {
- // Calculate required adjustment for 'KillingStart' in order to keep
- // remaining store size aligned on 'PerfAlign'.
- uint64_t Off =
- offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
- ToRemoveStart = KillingStart + Off;
+ ToRemoveStart = KillingStart;
if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
return false;
ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
@@ -612,6 +610,108 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
uint64_t NewSize = DeadSize - ToRemoveSize;
+
+ // Try to coerce the new memcpy/memset size to a "fast" value. This typically
+ // means some exact multiple of the register width of the loads/stores.
+
+ // If scalar size >= vec size, assume target will use scalars for implementing
+ // memset/memcpy.
+ TypeSize ScalarSize =
+ TTI.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar);
+ TypeSize VecSize =
+ TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector);
+ uint64_t MemUnit = 0;
+ if (ScalarSize >= VecSize)
+ MemUnit = ScalarSize.getFixedValue();
+ // Otherwise assume memset/memcpy will be lowered with Vec's
+ else
+ MemUnit =
+ TTI.getLoadStoreVecRegBitWidth(DeadIntrinsic->getDestAddressSpace());
+
+ MemUnit /= 8U;
+
+ // Assume loads/stores are issued by power of 2 regions. Try to minimize
+ // number of power of 2 blocks.
+ // ie if we have DeadSize = 15
+ // NewSize = 7 -> 8 (4 + 3 + 2 + 1) -> (8)
+ // NewSize = 9 -> 9 (8 + 1) == (8 + 1)
+ // NewSize = 11 -> 12 (8 + 2 + 1) -> (8 + 4)
+ uint64_t Upper = DeadSize;
+ uint64_t Lower = NewSize;
+
+ uint64_t RoundLower = MemUnit * (Lower / MemUnit);
+
+ // We have some trailing loads/stores we can try to optimize.
+ if (RoundLower != Lower && Lower != 0 && (RoundLower + MemUnit) != 0) {
+ Upper = std::min(Upper, RoundLower + MemUnit - 1);
+ // Don't bust inlining doing this.
+ uint64_t InlineThresh = TTI.getMaxMemIntrinsicInlineSizeThreshold();
+ if (Upper > InlineThresh && Lower <= InlineThresh)
+ Upper = InlineThresh;
+
+ // Replace Lower with value in range [Lower, Upper] that has min popcount
+ // (selecting for minimum value as tiebreaker when popcount is the same).
+ // The idea here is this will require the minimum number of load/stores and
+ // within that will use the presumably preferable minimum width.
+
+ // Get highest bit that differs between Lower and Upper. Anything above this
+ // bit must be in the new value. Anything below it thats larger than Lower
+ // is fair game.
+ uint64_t Dif = (Lower - 1) ^ Upper;
+ uint64_t HighestBit = 63 - llvm::countl_zero(Dif);
+
+ // Make Lo/Hi masks from the HighestDif bit. Lo mask is use to find value we
+ // can roundup for minimum power of 2 chunk, Hi mask is preserved.
+ uint64_t HighestP2 = static_cast<uint64_t>(1) << HighestBit;
+ uint64_t LoMask = HighestP2 - 1;
+ uint64_t HiMask = -HighestP2;
+
+ // Minimum power of 2 for the "tail"
+ uint64_t LoVal = Lower & LoMask;
+ if (LoVal)
+ LoVal = llvm::bit_ceil(LoVal);
+ // Preserved high bits to stay in range.
+ uint64_t HiVal = Lower & HiMask;
+ Lower = LoVal | HiVal;
+
+ // If we have more than two tail stores see if we can just roundup the next
+ // memunit.
+ if (llvm::popcount(Lower % MemUnit) > 1 &&
+ DeadSize >= (RoundLower + MemUnit))
+ Lower = RoundLower + MemUnit;
+
+ uint64_t OptimizedNewSize = NewSize;
+ // If we are over-writing the begining, make sure we don't mess up the
+ // alignment.
+ if (IsOverwriteEnd || isAligned(PrefAlign, DeadSize - Lower)) {
+ OptimizedNewSize = Lower;
+ } else {
+ // Our minimal value isn't properly aligned, see if we can
+ // increase the size of a tail loads/stores.
+ Lower = HiVal | HighestP2;
+ if (isAligned(PrefAlign, DeadSize - Lower))
+ OptimizedNewSize = Lower;
+ // If we can't adjust size without messing up alignment, see if the new
+ // size is actually preferable.
+ // TODO: We should probably do better here than just giving up.
+ else if ((NewSize <= InlineThresh) == (DeadSize <= InlineThresh) &&
+ llvm::popcount(NewSize) > llvm::popcount(DeadSize) &&
+ DeadSize / MemUnit == NewSize / MemUnit)
+ return false;
+ }
+
+ // Adjust new starting point for the memset/memcpy.
+ if (OptimizedNewSize != NewSize) {
+ if (!IsOverwriteEnd)
+ ToRemoveSize = DeadSize - OptimizedNewSize;
+ NewSize = OptimizedNewSize;
+ }
+
+ // Our optimal length is the original length, skip the transform.
+ if (NewSize == DeadSize)
+ return false;
+ }
+
if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
// When shortening an atomic memory intrinsic, the newly shortened
// length must remain an integer multiple of the element size.
@@ -654,7 +754,8 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
}
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
- int64_t &DeadStart, uint64_t &DeadSize) {
+ int64_t &DeadStart, uint64_t &DeadSize,
+ const TargetTransformInfo &TTI) {
if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
return false;
@@ -672,7 +773,7 @@ static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
// be non negative due to preceding checks.
KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
- true)) {
+ true, TTI)) {
IntervalMap.erase(OII);
return true;
}
@@ -682,7 +783,8 @@ static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
static bool tryToShortenBegin(Instruction *DeadI,
OverlapIntervalsTy &IntervalMap,
- int64_t &DeadStart, uint64_t &DeadSize) {
+ int64_t &DeadStart, uint64_t &DeadSize,
+ const TargetTransformInfo &TTI) {
if (IntervalMap.empty() || !isShortenableAtTheBeginning(DeadI))
return false;
@@ -701,7 +803,7 @@ static bool tryToShortenBegin(Instruction *DeadI,
assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
"Should have been handled as OW_Complete");
if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
- false)) {
+ false, TTI)) {
IntervalMap.erase(OII);
return true;
}
@@ -852,6 +954,7 @@ struct DSEState {
DominatorTree &DT;
PostDominatorTree &PDT;
const TargetLibraryInfo &TLI;
+ const TargetTransformInfo &TTI;
const DataLayout &DL;
const LoopInfo &LI;
@@ -896,9 +999,9 @@ struct DSEState {
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
- const LoopInfo &LI)
+ const TargetTransformInfo &TTI, const LoopInfo &LI)
: F(F), AA(AA), EI(DT, &LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
- PDT(PDT), TLI(TLI), DL(F.getDataLayout()), LI(LI) {
+ PDT(PDT), TLI(TLI), TTI(TTI), DL(F.getDataLayout()), LI(LI) {
// Collect blocks with throwing instructions not modeled in MemorySSA and
// alloc-like objects.
unsigned PO = 0;
@@ -2103,10 +2206,10 @@ struct DSEState {
uint64_t DeadSize = Loc.Size.getValue();
GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
OverlapIntervalsTy &IntervalMap = OI.second;
- Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
+ Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize, TTI);
if (IntervalMap.empty())
continue;
- Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
+ Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize, TTI);
}
return Changed;
}
@@ -2347,9 +2450,10 @@ bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) {
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
DominatorTree &DT, PostDominatorTree &PDT,
const TargetLibraryInfo &TLI,
+ const TargetTransformInfo &TTI,
const LoopInfo &LI) {
bool MadeChange = false;
- DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
+ DSEState State(F, AA, MSSA, DT, PDT, TLI, TTI, LI);
// For each store:
for (unsigned I = 0; I < State.MemDefs.size(); I++) {
MemoryDef *KillingDef = State.MemDefs[I];
@@ -2383,12 +2487,13 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
AliasAnalysis &AA = AM.getResult<AAManager>(F);
const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ const TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
- bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
+ bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, TTI, LI);
#ifdef LLVM_ENABLE_STATS
if (AreStatisticsEnabled())
diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll
index 74ac4d763e5e5b..64a75a61a038fe 100644
--- a/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll
+++ b/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll
@@ -37,13 +37,11 @@ define dso_local void @_Z1fv() local_unnamed_addr !dbg !7 {
; CHECK-NEXT: #dbg_assign(float 0.000000e+00, [[META12]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), [[META39:![0-9]+]], ptr poison, !DIExpression(), [[META25]])
; CHECK-NEXT: [[TMP0:%.*]] = bitcast ptr [[ARRAYIDX5_I]] to ptr, !dbg [[DBG40:![0-9]+]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4, !dbg [[DBG41:![0-9]+]]
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 8, i1 false), !dbg [[DBG41]], !DIAssignID [[META34]]
-; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 3, !dbg [[META25]]
-; CHECK-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX7]], align 4, !dbg [[META25]], !DIAssignID [[DIASSIGNID42:![0-9]+]]
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 12, i1 false), !dbg [[DBG41]], !DIAssignID [[META34]]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 0, !dbg [[META25]]
-; CHECK-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !dbg [[META25]], !DIAssignID [[DIASSIGNID43:![0-9]+]]
+; CHECK-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !dbg [[META25]], !DIAssignID [[DIASSIGNID42:![0-9]+]]
; CHECK-NEXT: call void @_Z3escP1v(ptr nonnull [[G]]), !dbg [[DBG40]]
-; CHECK-NEXT: ret void, !dbg [[DBG44:![0-9]+]]
+; CHECK-NEXT: ret void, !dbg [[DBG43:![0-9]+]]
;
entry:
%g = alloca %struct.v, align 4, !DIAssignID !23
@@ -179,6 +177,5 @@ declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
; CHECK: [[DBG40]] = !DILocation(line: 14, column: 3, scope: [[DBG8]])
; CHECK: [[DBG41]] = !DILocation(line: 5, column: 17, scope: [[META27]], inlinedAt: [[META33]])
; CHECK: [[DIASSIGNID42]] = distinct !DIAssignID()
-; CHECK: [[DIASSIGNID43]] = distinct !DIAssignID()
-; CHECK: [[DBG44]] = !DILocation(line: 15, column: 1, scope: [[DBG8]])
+; CHECK: [[DBG43]] = !DILocation(line: 15, column: 1, scope: [[DBG8]])
;.
diff --git a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
index 6a257792db833c..135b4a18341e92 100644
--- a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
@@ -234,14 +234,22 @@ entry:
}
define void @write2to10(ptr nocapture %p) {
-; CHECK-LABEL: @write2to10(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 28, i1 false)
-; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1
-; CHECK-NEXT: store i64 1, ptr [[ARRAYIDX2]], align 8
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @write2to10(
+; CHECK-MEM4-NEXT: entry:
+; CHECK-MEM4-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
+; CHECK-MEM4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
+; CHECK-MEM4-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 28, i1 false)
+; CHECK-MEM4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1
+; CHECK-MEM4-NEXT: store i64 1, ptr [[ARRAYIDX2]], align 8
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @write2to10(
+; CHECK-MEM16-NEXT: entry:
+; CHECK-MEM16-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
+; CHECK-MEM16-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ARRAYIDX0]], i8 0, i64 32, i1 false)
+; CHECK-MEM16-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1
+; CHECK-MEM16-NEXT: store i64 1, ptr [[ARRAYIDX2]], align 8
+; CHECK-MEM16-NEXT: ret void
;
entry:
%arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1
@@ -252,14 +260,22 @@ entry:
}
define void @write2to10_atomic(ptr nocapture %p) {
-; CHECK-LABEL: @write2to10_atomic(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
-; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 28, i32 4)
-; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1
-; CHECK-NEXT: store atomic i64 1, ptr [[ARRAYIDX2]] unordered, align 8
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @write2to10_atomic(
+; CHECK-MEM4-NEXT: entry:
+; CHECK-MEM4-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
+; CHECK-MEM4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
+; CHECK-MEM4-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 28, i32 4)
+; CHECK-MEM4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1
+; CHECK-MEM4-NEXT: store atomic i64 1, ptr [[ARRAYIDX2]] unordered, align 8
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @write2to10_atomic(
+; CHECK-MEM16-NEXT: entry:
+; CHECK-MEM16-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
+; CHECK-MEM16-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[ARRAYIDX0]], i8 0, i64 32, i32 4)
+; CHECK-MEM16-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1
+; CHECK-MEM16-NEXT: store atomic i64 1, ptr [[ARRAYIDX2]] unordered, align 8
+; CHECK-MEM16-NEXT: ret void
;
entry:
%arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1
@@ -367,13 +383,20 @@ declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture, i8, i64, i1) nounwi
declare void @llvm.memset.element.unordered.atomic.p0.i64(ptr nocapture, i8, i64, i32) nounwind
define void @ow_begin_align1(ptr nocapture %p) {
-; CHECK-LABEL: @ow_begin_align1(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 7
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP0]], i8 0, i64 25, i1 false)
-; CHECK-NEXT: store i64 1, ptr [[P]], align 1
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @ow_begin_align1(
+; CHECK-MEM4-NEXT: entry:
+; CHECK-MEM4-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
+; CHECK-MEM4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 7
+; CHECK-MEM4-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP0]], i8 0, i64 25, i1 false)
+; CHECK-MEM4-NEXT: store i64 1, ptr [[P]], align 1
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @ow_begin_align1(
+; CHECK-MEM16-NEXT: entry:
+; CHECK-MEM16-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
+; CHECK-MEM16-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P1]], i8 0, i64 32, i1 false)
+; CHECK-MEM16-NEXT: store i64 1, ptr [[P]], align 1
+; CHECK-MEM16-NEXT: ret void
;
entry:
%p1 = getelementptr inbounds i8, ptr %p, i64 1
@@ -383,13 +406,20 @@ entry:
}
define void @ow_end_align4(ptr nocapture %p) {
-; CHECK-LABEL: @ow_end_align4(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 4
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 28, i1 false)
-; CHECK-NEXT: store i64 1, ptr [[P]], align 1
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @ow_end_align4(
+; CHECK-MEM4-NEXT: entry:
+; CHECK-MEM4-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
+; CHECK-MEM4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 4
+; CHECK-MEM4-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 28, i1 false)
+; CHECK-MEM4-NEXT: store i64 1, ptr [[P]], align 1
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @ow_end_align4(
+; CHECK-MEM16-NEXT: entry:
+; CHECK-MEM16-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
+; CHECK-MEM16-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P1]], i8 0, i64 32, i1 false)
+; CHECK-MEM16-NEXT: store i64 1, ptr [[P]], align 1
+; CHECK-MEM16-NEXT: ret void
;
entry:
%p1 = getelementptr inbounds i8, ptr %p, i64 1
@@ -414,13 +444,21 @@ entry:
}
define void @memset_optimize_size_lo_33_to_x86_32_generic_28(ptr %p) {
-; CHECK-LABEL: @memset_optimize_size_lo_33_to_x86_32_generic_28(
-; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 3
-; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 5
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP1]], i8 0, i64 28, i1 false)
-; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @memset_optimize_size_lo_33_to_x86_32_generic_28(
+; CHECK-MEM4-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 3
+; CHECK-MEM4-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
+; CHECK-MEM4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 5
+; CHECK-MEM4-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP1]], i8 0, i64 28, i1 false)
+; CHECK-MEM4-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @memset_optimize_size_lo_33_to_x86_32_generic_28(
+; CHECK-MEM16-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 3
+; CHECK-MEM16-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
+; CHECK-MEM16-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 1
+; CHECK-MEM16-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP1]], i8 0, i64 32, i1 false)
+; CHECK-MEM16-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-MEM16-NEXT: ret void
;
%p0 = getelementptr inbounds i8, ptr %p, i64 3
%p1 = getelementptr inbounds i8, ptr %p, i64 0
@@ -446,13 +484,20 @@ define void @memset_optimize_size_lo_33_misaligned_x86_fail_generic_save_unit(pt
}
define void @memset_optimize_size_lo_32_x86_misaligned_fail_generic_save_unit2(ptr %p) {
-; CHECK-LABEL: @memset_optimize_size_lo_32_x86_misaligned_fail_generic_save_unit2(
-; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
-; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 4
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[TMP1]], i8 0, i64 28, i1 false)
-; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @memset_optimize_size_lo_32_x86_misaligned_fail_generic_save_unit2(
+; CHECK-MEM4-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
+; CHECK-MEM4-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
+; CHECK-MEM4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 4
+; CHECK-MEM4-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[TMP1]], i8 0, i64 28, i1 false)
+; CHECK-MEM4-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @memset_optimize_size_lo_32_x86_misaligned_fail_generic_save_unit2(
+; CHECK-MEM16-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
+; CHECK-MEM16-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
+; CHECK-MEM16-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[P0]], i8 0, i64 32, i1 false)
+; CHECK-MEM16-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-MEM16-NEXT: ret void
;
%p0 = getelementptr inbounds i8, ptr %p, i64 4
%p1 = getelementptr inbounds i8, ptr %p, i64 0
@@ -462,13 +507,21 @@ define void @memset_optimize_size_lo_32_x86_misaligned_fail_generic_save_unit2(p
}
define void @memset_optimize_size_lo_34_to_32(ptr %p) {
-; CHECK-LABEL: @memset_optimize_size_lo_34_to_32(
-; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
-; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 4
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[TMP1]], i8 0, i64 30, i1 false)
-; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @memset_optimize_size_lo_34_to_32(
+; CHECK-MEM4-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
+; CHECK-MEM4-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
+; CHECK-MEM4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 4
+; CHECK-MEM4-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[TMP1]], i8 0, i64 30, i1 false)
+; CHECK-MEM4-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @memset_optimize_size_lo_34_to_32(
+; CHECK-MEM16-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
+; CHECK-MEM16-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
+; CHECK-MEM16-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 2
+; CHECK-MEM16-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[TMP1]], i8 0, i64 32, i1 false)
+; CHECK-MEM16-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-MEM16-NEXT: ret void
;
%p0 = getelementptr inbounds i8, ptr %p, i64 4
%p1 = getelementptr inbounds i8, ptr %p, i64 0
@@ -494,13 +547,21 @@ define void @memset_optimize_size_lo_34_x86_misaligned_fail_generic_save_unit(pt
}
define void @memset_optimize_size_lo_34_to_32_no_align_okay(ptr %p) {
-; CHECK-LABEL: @memset_optimize_size_lo_34_to_32_no_align_okay(
-; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
-; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 4
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP1]], i8 0, i64 30, i1 false)
-; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @memset_optimize_size_lo_34_to_32_no_align_okay(
+; CHECK-MEM4-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
+; CHECK-MEM4-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
+; CHECK-MEM4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 4
+; CHECK-MEM4-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP1]], i8 0, i64 30, i1 false)
+; CHECK-MEM4-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @memset_optimize_size_lo_34_to_32_no_align_okay(
+; CHECK-MEM16-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
+; CHECK-MEM16-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 0
+; CHECK-MEM16-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P0]], i64 2
+; CHECK-MEM16-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP1]], i8 0, i64 32, i1 false)
+; CHECK-MEM16-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-MEM16-NEXT: ret void
;
%p0 = getelementptr inbounds i8, ptr %p, i64 4
%p1 = getelementptr inbounds i8, ptr %p, i64 0
diff --git a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
index 9fc3f827e614db..4ad84f213c08d2 100644
--- a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
@@ -68,12 +68,19 @@ entry:
}
define void @write28to32(ptr nocapture %p) nounwind uwtable ssp {
-; CHECK-LABEL: @write28to32(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 28, i1 false)
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
-; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1]], align 4
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @write28to32(
+; CHECK-MEM4-NEXT: entry:
+; CHECK-MEM4-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 28, i1 false)
+; CHECK-MEM4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
+; CHECK-MEM4-NEXT: store i32 1, ptr [[ARRAYIDX1]], align 4
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @write28to32(
+; CHECK-MEM16-NEXT: entry:
+; CHECK-MEM16-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 32, i1 false)
+; CHECK-MEM16-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
+; CHECK-MEM16-NEXT: store i32 1, ptr [[ARRAYIDX1]], align 4
+; CHECK-MEM16-NEXT: ret void
;
entry:
call void @llvm.memset.p0.i64(ptr align 4 %p, i8 0, i64 32, i1 false)
@@ -83,12 +90,19 @@ entry:
}
define void @write28to32_atomic(ptr nocapture %p) nounwind uwtable ssp {
-; CHECK-LABEL: @write28to32_atomic(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 28, i32 4)
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
-; CHECK-NEXT: store atomic i32 1, ptr [[ARRAYIDX1]] unordered, align 4
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @write28to32_atomic(
+; CHECK-MEM4-NEXT: entry:
+; CHECK-MEM4-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 28, i32 4)
+; CHECK-MEM4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
+; CHECK-MEM4-NEXT: store atomic i32 1, ptr [[ARRAYIDX1]] unordered, align 4
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @write28to32_atomic(
+; CHECK-MEM16-NEXT: entry:
+; CHECK-MEM16-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 32, i32 4)
+; CHECK-MEM16-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
+; CHECK-MEM16-NEXT: store atomic i32 1, ptr [[ARRAYIDX1]] unordered, align 4
+; CHECK-MEM16-NEXT: ret void
;
entry:
call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 %p, i8 0, i64 32, i32 4)
@@ -98,12 +112,19 @@ entry:
}
define void @dontwrite28to32memset(ptr nocapture %p) nounwind uwtable ssp {
-; CHECK-LABEL: @dontwrite28to32memset(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[P:%.*]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
-; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1]], align 4
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @dontwrite28to32memset(
+; CHECK-MEM4-NEXT: entry:
+; CHECK-MEM4-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[P:%.*]], i8 0, i64 28, i1 false)
+; CHECK-MEM4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
+; CHECK-MEM4-NEXT: store i32 1, ptr [[ARRAYIDX1]], align 4
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @dontwrite28to32memset(
+; CHECK-MEM16-NEXT: entry:
+; CHECK-MEM16-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[P:%.*]], i8 0, i64 32, i1 false)
+; CHECK-MEM16-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
+; CHECK-MEM16-NEXT: store i32 1, ptr [[ARRAYIDX1]], align 4
+; CHECK-MEM16-NEXT: ret void
;
entry:
call void @llvm.memset.p0.i64(ptr align 16 %p, i8 0, i64 32, i1 false)
@@ -113,12 +134,19 @@ entry:
}
define void @dontwrite28to32memset_atomic(ptr nocapture %p) nounwind uwtable ssp {
-; CHECK-LABEL: @dontwrite28to32memset_atomic(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 [[P:%.*]], i8 0, i64 32, i32 4)
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
-; CHECK-NEXT: store atomic i32 1, ptr [[ARRAYIDX1]] unordered, align 4
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @dontwrite28to32memset_atomic(
+; CHECK-MEM4-NEXT: entry:
+; CHECK-MEM4-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 [[P:%.*]], i8 0, i64 28, i32 4)
+; CHECK-MEM4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
+; CHECK-MEM4-NEXT: store atomic i32 1, ptr [[ARRAYIDX1]] unordered, align 4
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @dontwrite28to32memset_atomic(
+; CHECK-MEM16-NEXT: entry:
+; CHECK-MEM16-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 [[P:%.*]], i8 0, i64 32, i32 4)
+; CHECK-MEM16-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
+; CHECK-MEM16-NEXT: store atomic i32 1, ptr [[ARRAYIDX1]] unordered, align 4
+; CHECK-MEM16-NEXT: ret void
;
entry:
call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 16 %p, i8 0, i64 32, i32 4)
@@ -204,12 +232,19 @@ entry:
}
define void @dontwrite28to32memcpy(ptr nocapture %p) nounwind uwtable ssp {
-; CHECK-LABEL: @dontwrite28to32memcpy(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[P:%.*]], ptr align 16 @glob1, i64 32, i1 false)
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], ptr [[P]], i64 0, i32 0, i64 7
-; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1]], align 4
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @dontwrite28to32memcpy(
+; CHECK-MEM4-NEXT: entry:
+; CHECK-MEM4-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[P:%.*]], ptr align 16 @glob1, i64 28, i1 false)
+; CHECK-MEM4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], ptr [[P]], i64 0, i32 0, i64 7
+; CHECK-MEM4-NEXT: store i32 1, ptr [[ARRAYIDX1]], align 4
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @dontwrite28to32memcpy(
+; CHECK-MEM16-NEXT: entry:
+; CHECK-MEM16-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[P:%.*]], ptr align 16 @glob1, i64 32, i1 false)
+; CHECK-MEM16-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], ptr [[P]], i64 0, i32 0, i64 7
+; CHECK-MEM16-NEXT: store i32 1, ptr [[ARRAYIDX1]], align 4
+; CHECK-MEM16-NEXT: ret void
;
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr align 16 %p, ptr align 16 @glob1, i64 32, i1 false)
@@ -219,12 +254,19 @@ entry:
}
define void @dontwrite28to32memcpy_atomic(ptr nocapture %p) nounwind uwtable ssp {
-; CHECK-LABEL: @dontwrite28to32memcpy_atomic(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 16 [[P:%.*]], ptr align 16 @glob1, i64 32, i32 4)
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], ptr [[P]], i64 0, i32 0, i64 7
-; CHECK-NEXT: store atomic i32 1, ptr [[ARRAYIDX1]] unordered, align 4
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @dontwrite28to32memcpy_atomic(
+; CHECK-MEM4-NEXT: entry:
+; CHECK-MEM4-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 16 [[P:%.*]], ptr align 16 @glob1, i64 28, i32 4)
+; CHECK-MEM4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], ptr [[P]], i64 0, i32 0, i64 7
+; CHECK-MEM4-NEXT: store atomic i32 1, ptr [[ARRAYIDX1]] unordered, align 4
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @dontwrite28to32memcpy_atomic(
+; CHECK-MEM16-NEXT: entry:
+; CHECK-MEM16-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 16 [[P:%.*]], ptr align 16 @glob1, i64 32, i32 4)
+; CHECK-MEM16-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_VEC2:%.*]], ptr [[P]], i64 0, i32 0, i64 7
+; CHECK-MEM16-NEXT: store atomic i32 1, ptr [[ARRAYIDX1]] unordered, align 4
+; CHECK-MEM16-NEXT: ret void
;
entry:
tail call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 16 %p, ptr align 16 @glob1, i64 32, i32 4)
@@ -351,13 +393,21 @@ entry:
}
define void @ow_end_align1(ptr nocapture %p) {
-; CHECK-LABEL: @ow_end_align1(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P1]], i8 0, i64 27, i1 false)
-; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 27
-; CHECK-NEXT: store i64 1, ptr [[P2]], align 1
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @ow_end_align1(
+; CHECK-MEM4-NEXT: entry:
+; CHECK-MEM4-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
+; CHECK-MEM4-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P1]], i8 0, i64 28, i1 false)
+; CHECK-MEM4-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 27
+; CHECK-MEM4-NEXT: store i64 1, ptr [[P2]], align 1
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @ow_end_align1(
+; CHECK-MEM16-NEXT: entry:
+; CHECK-MEM16-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
+; CHECK-MEM16-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P1]], i8 0, i64 32, i1 false)
+; CHECK-MEM16-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 27
+; CHECK-MEM16-NEXT: store i64 1, ptr [[P2]], align 1
+; CHECK-MEM16-NEXT: ret void
;
entry:
%p1 = getelementptr inbounds i8, ptr %p, i64 1
@@ -368,13 +418,21 @@ entry:
}
define void @ow_end_align4(ptr nocapture %p) {
-; CHECK-LABEL: @ow_end_align4(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P1]], i8 0, i64 28, i1 false)
-; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 27
-; CHECK-NEXT: store i64 1, ptr [[P2]], align 1
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @ow_end_align4(
+; CHECK-MEM4-NEXT: entry:
+; CHECK-MEM4-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
+; CHECK-MEM4-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P1]], i8 0, i64 28, i1 false)
+; CHECK-MEM4-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 27
+; CHECK-MEM4-NEXT: store i64 1, ptr [[P2]], align 1
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @ow_end_align4(
+; CHECK-MEM16-NEXT: entry:
+; CHECK-MEM16-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
+; CHECK-MEM16-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P1]], i8 0, i64 32, i1 false)
+; CHECK-MEM16-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 27
+; CHECK-MEM16-NEXT: store i64 1, ptr [[P2]], align 1
+; CHECK-MEM16-NEXT: ret void
;
entry:
%p1 = getelementptr inbounds i8, ptr %p, i64 1
@@ -385,13 +443,21 @@ entry:
}
define void @ow_end_align8(ptr nocapture %p) {
-; CHECK-LABEL: @ow_end_align8(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[P1]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 27
-; CHECK-NEXT: store i64 1, ptr [[P2]], align 1
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @ow_end_align8(
+; CHECK-MEM4-NEXT: entry:
+; CHECK-MEM4-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
+; CHECK-MEM4-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[P1]], i8 0, i64 28, i1 false)
+; CHECK-MEM4-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 27
+; CHECK-MEM4-NEXT: store i64 1, ptr [[P2]], align 1
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @ow_end_align8(
+; CHECK-MEM16-NEXT: entry:
+; CHECK-MEM16-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
+; CHECK-MEM16-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[P1]], i8 0, i64 32, i1 false)
+; CHECK-MEM16-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 27
+; CHECK-MEM16-NEXT: store i64 1, ptr [[P2]], align 1
+; CHECK-MEM16-NEXT: ret void
;
entry:
%p1 = getelementptr inbounds i8, ptr %p, i64 1
@@ -405,7 +471,7 @@ define void @memset_optimize_size_hi_31_to_24(ptr %p) {
; CHECK-LABEL: @memset_optimize_size_hi_31_to_24(
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 23
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P0]], i8 0, i64 23, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P0]], i8 0, i64 24, i1 false)
; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
; CHECK-NEXT: ret void
;
@@ -417,12 +483,19 @@ define void @memset_optimize_size_hi_31_to_24(ptr %p) {
}
define void @memset_optimize_size_hi_32_no_change_x86_change_generic(ptr %p) {
-; CHECK-LABEL: @memset_optimize_size_hi_32_no_change_x86_change_generic(
-; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
-; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 28
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P0]], i8 0, i64 28, i1 false)
-; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @memset_optimize_size_hi_32_no_change_x86_change_generic(
+; CHECK-MEM4-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
+; CHECK-MEM4-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 28
+; CHECK-MEM4-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P0]], i8 0, i64 28, i1 false)
+; CHECK-MEM4-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @memset_optimize_size_hi_32_no_change_x86_change_generic(
+; CHECK-MEM16-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
+; CHECK-MEM16-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 28
+; CHECK-MEM16-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P0]], i8 0, i64 32, i1 false)
+; CHECK-MEM16-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-MEM16-NEXT: ret void
;
%p0 = getelementptr inbounds i8, ptr %p, i64 0
%p1 = getelementptr inbounds i8, ptr %p, i64 28
@@ -432,12 +505,19 @@ define void @memset_optimize_size_hi_32_no_change_x86_change_generic(ptr %p) {
}
define void @memset_optimize_size_hi_28_to_24(ptr %p) {
-; CHECK-LABEL: @memset_optimize_size_hi_28_to_24(
-; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
-; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 21
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[P0]], i8 0, i64 24, i1 false)
-; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @memset_optimize_size_hi_28_to_24(
+; CHECK-MEM4-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
+; CHECK-MEM4-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 21
+; CHECK-MEM4-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[P0]], i8 0, i64 21, i1 false)
+; CHECK-MEM4-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @memset_optimize_size_hi_28_to_24(
+; CHECK-MEM16-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
+; CHECK-MEM16-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 21
+; CHECK-MEM16-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[P0]], i8 0, i64 24, i1 false)
+; CHECK-MEM16-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-MEM16-NEXT: ret void
;
%p0 = getelementptr inbounds i8, ptr %p, i64 0
%p1 = getelementptr inbounds i8, ptr %p, i64 21
@@ -462,12 +542,19 @@ define void @memset_optimize_size_hi_31_to_28(ptr %p) {
}
define void @memset_optimize_size_hi_33_to_x86_32_generic_28(ptr %p) {
-; CHECK-LABEL: @memset_optimize_size_hi_33_to_x86_32_generic_28(
-; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
-; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 27
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P0]], i8 0, i64 28, i1 false)
-; CHECK-NEXT: store i64 0, ptr [[P1]], align 1
-; CHECK-NEXT: ret void
+; CHECK-MEM4-LABEL: @memset_optimize_size_hi_33_to_x86_32_generic_28(
+; CHECK-MEM4-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
+; CHECK-MEM4-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 27
+; CHECK-MEM4-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P0]], i8 0, i64 28, i1 false)
+; CHECK-MEM4-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-MEM4-NEXT: ret void
+;
+; CHECK-MEM16-LABEL: @memset_optimize_size_hi_33_to_x86_32_generic_28(
+; CHECK-MEM16-NEXT: [[P0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 0
+; CHECK-MEM16-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 27
+; CHECK-MEM16-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P0]], i8 0, i64 32, i1 false)
+; CHECK-MEM16-NEXT: store i64 0, ptr [[P1]], align 1
+; CHECK-MEM16-NEXT: ret void
;
%p0 = getelementptr inbounds i8, ptr %p, i64 0
%p1 = getelementptr inbounds i8, ptr %p, i64 27
@@ -475,6 +562,3 @@ define void @memset_optimize_size_hi_33_to_x86_32_generic_28(ptr %p) {
store i64 0, ptr %p1, align 1
ret void
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-MEM16: {{.*}}
-; CHECK-MEM4: {{.*}}
diff --git a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
index c9a0943de8cd98..2d04179eeb6e0f 100644
--- a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll
@@ -549,8 +549,8 @@ define void @test12_memset_later_store_exceeds_memset(ptr %ptr) {
define void @test12_memset_later_store_before_memset(ptr %ptr) {
; CHECK-LABEL: @test12_memset_later_store_before_memset(
; CHECK-NEXT: [[PTR_1:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 1
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR_1]], i64 7
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP1]], i8 0, i64 3, i1 false)
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR_1]], i64 6
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP1]], i8 0, i64 4, i1 false)
; CHECK-NEXT: store i64 0, ptr [[PTR]], align 8
; CHECK-NEXT: ret void
;
More information about the llvm-commits
mailing list