[llvm] e0911b9 - [MemCpyOpt] precommit test for D155406 (NFC)

via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 24 06:19:16 PDT 2023


Author: khei4
Date: 2023-08-24T22:19:01+09:00
New Revision: e0911b98d19e5766069cdf0f5bd1d0f6e3267a49

URL: https://github.com/llvm/llvm-project/commit/e0911b98d19e5766069cdf0f5bd1d0f6e3267a49
DIFF: https://github.com/llvm/llvm-project/commit/e0911b98d19e5766069cdf0f5bd1d0f6e3267a49.diff

LOG: [MemCpyOpt] precommit test for D155406 (NFC)
Differential Revision: https://reviews.llvm.org/D155422

Added: 
    

Modified: 
    llvm/test/Transforms/MemCpyOpt/stack-move.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
index 00c527f6f62ddf..966f6a6eb1ef00 100644
--- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll
+++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
@@ -7,6 +7,7 @@ declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias
 declare void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
 declare void @llvm.memcpy.p2.p1.i64(ptr addrspace(2) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg)
 declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
 
 declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
 declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
@@ -444,8 +445,7 @@ bb1:
   ret void
 }
 
-
-; TODO: merge allocas for bb-separated, but logically straight
+; TODO: Merge alloca
 define void @multi_bb_load_store(i1 %b) {
 ; CHECK-LABEL: define void @multi_bb_load_store
 ; CHECK-SAME: (i1 [[B:%.*]]) {
@@ -456,6 +456,44 @@ define void @multi_bb_load_store(i1 %b) {
 ; CHECK-NEXT:    store i32 42, ptr [[SRC]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
 ; CHECK-NEXT:    [[SRC_VAL:%.*]] = load i32, ptr [[SRC]], align 4
+; CHECK-NEXT:    store i32 [[SRC_VAL]], ptr [[DEST]], align 4
+; CHECK-NEXT:    br label [[BB0:%.*]]
+; CHECK:       bb0:
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[DEST]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[DEST]])
+; CHECK-NEXT:    ret void
+;
+  %src = alloca i32, align 4
+  %dest = alloca i32, align 4
+  call void @llvm.lifetime.start.p0(i64 4, ptr nocapture %src)
+  call void @llvm.lifetime.start.p0(i64 4, ptr nocapture %dest)
+  store i32 42, ptr %src
+  %1 = call i32 @use_nocapture(ptr nocapture %src)
+
+  %src.val = load i32, ptr %src
+  store i32 %src.val, ptr %dest
+  br label %bb0
+
+bb0:
+  %2 = call i32 @use_nocapture(ptr nocapture %dest)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nocapture %src)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nocapture %dest)
+  ret void
+}
+
+; FIXME: merge allocas for bb-separated, but logically straight.
+; We might be handle those load/store MemCpyOpt totally
+define void @multi_bb_separated_load_store(i1 %b) {
+; CHECK-LABEL: define void @multi_bb_separated_load_store
+; CHECK-SAME: (i1 [[B:%.*]]) {
+; CHECK-NEXT:    [[SRC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[DEST:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[DEST]])
+; CHECK-NEXT:    store i32 42, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
+; CHECK-NEXT:    [[SRC_VAL:%.*]] = load i32, ptr [[SRC]], align 4
 ; CHECK-NEXT:    br label [[BB0:%.*]]
 ; CHECK:       bb0:
 ; CHECK-NEXT:    store i32 [[SRC_VAL]], ptr [[DEST]], align 4
@@ -534,8 +572,94 @@ bb2:
   ret void
 }
 
-define ptr @multi_bb_loop(i32 %n) {
-; CHECK-LABEL: define ptr @multi_bb_loop
+; Test for BasicBlock and Instruction mixed post-dominator finding.
+define void @multi_bb_pdom_test0(i1 %b) {
+; CHECK-LABEL: define void @multi_bb_pdom_test0
+; CHECK-SAME: (i1 [[B:%.*]]) {
+; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT:    [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
+; CHECK-NEXT:    br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]]
+; CHECK:       bb0:
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
+; CHECK-NEXT:    br label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT:    ret void
+;
+  %src = alloca %struct.Foo, align 4
+  %dest = alloca %struct.Foo, align 4
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
+  store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false); 1
+  br i1 %b, label %bb0, label %bb1
+
+bb0:
+  %1 = call i32 @use_nocapture(ptr noundef nocapture %dest)
+  br label %bb2
+
+bb1:
+  %2 = call i32 @use_nocapture(ptr noundef nocapture %dest)
+  br label %bb2
+
+bb2:
+  %3 = call i32 @use_nocapture(ptr noundef nocapture %dest)
+  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
+  ret void
+
+  uselistorder ptr %dest, {  2, 3, 0, 1, 4, 5 }
+}
+
+; Test for inserting lifetime.end after the phi-node
+define void @multi_bb_pdom_test1(i1 %b) {
+; CHECK-LABEL: define void @multi_bb_pdom_test1
+; CHECK-SAME: (i1 [[B:%.*]]) {
+; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT:    [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
+; CHECK-NEXT:    br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]]
+; CHECK:       bb0:
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
+; CHECK-NEXT:    br label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 42, [[BB0]] ], [ 41, [[BB1]] ]
+; CHECK-NEXT:    ret void
+;
+  %src = alloca %struct.Foo, align 4
+  %dest = alloca %struct.Foo, align 4
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
+  store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false); 1
+  br i1 %b, label %bb0, label %bb1
+
+bb0:
+  %1 = call i32 @use_nocapture(ptr noundef nocapture %dest)
+  br label %bb2
+
+bb1:
+  %2 = call i32 @use_nocapture(ptr noundef nocapture %dest)
+  br label %bb2
+
+bb2:
+  %i = phi i32 [ 42, %bb0 ], [ 41, %bb1 ]
+  ret void
+
+}
+
+; TODO: merge allocas for multi basicblock loop case.
+define void @multi_bb_loop(i32 %n) {
+; CHECK-LABEL: define void @multi_bb_loop
 ; CHECK-SAME: (i32 [[N:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[NLT1:%.*]] = icmp slt i32 [[N]], 1
@@ -549,11 +673,11 @@ define ptr @multi_bb_loop(i32 %n) {
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[NEW_I:%.*]], [[LOOP_BODY]] ], [ 1, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DEST]], ptr align 8 [[SRC]], i64 12, i1 false)
 ; CHECK-NEXT:    [[NEW_I]] = add i32 [[I]], 1
-; CHECK-NEXT:    store i32 [[NEW_I]], ptr [[DEST]], align 4
+; CHECK-NEXT:    store i32 [[NEW_I]], ptr [[SRC]], align 4
 ; CHECK-NEXT:    [[IGTN:%.*]] = icmp sgt i32 [[NEW_I]], [[N]]
 ; CHECK-NEXT:    br i1 [[IGTN]], label [[LOOP_EXIT]], label [[LOOP_BODY]]
 ; CHECK:       loop_exit:
-; CHECK-NEXT:    ret ptr [[DEST]]
+; CHECK-NEXT:    ret void
 ;
 entry:
   %nlt1 = icmp slt i32 %n, 1
@@ -568,16 +692,266 @@ loop_body:
   %i = phi i32 [ %new_i, %loop_body ], [ 1, %entry ]
   call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 12, i1 false)
   %new_i = add i32 %i, 1
-  store i32 %new_i, ptr %dest
+  store i32 %new_i, ptr %src
   %igtn = icmp sgt i32 %new_i, %n
   br i1 %igtn, label %loop_exit, label %loop_body
 
 loop_exit:
-  ret ptr %dest
+  ret void
+}
+
+; TODO: merge allocas for multi basicblocks, s.t. some modref which is unreachable from copy exists.
+define void @multi_bb_unreachable_modref(i1 %b0) {
+; CHECK-LABEL: define void @multi_bb_unreachable_modref
+; CHECK-SAME: (i1 [[B0:%.*]]) {
+; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT:    [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT:    br i1 [[B0]], label [[BB0:%.*]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT:    ret void
+; CHECK:       bb0:
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT:    ret void
+;
+  %src = alloca %struct.Foo, align 4
+  %dest = alloca %struct.Foo, align 4
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src)
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
+  store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+  %1 = call i32 @use_nocapture(ptr noundef nocapture %src)
+  br i1 %b0, label %bb0, label %exit
+
+exit:
+  %2 = call i32 @use_nocapture(ptr noundef nocapture %src)
+  ret void
+
+bb0:
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false)
+  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src)
+  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
+  ret void
+}
+
+; TODO: merge allocas for multi basicblocks, s.t. memcpy doesn't dominate the uses.
+define void @multi_bb_non_dominated(i1 %b0, i1 %b1) {
+; CHECK-LABEL: define void @multi_bb_non_dominated
+; CHECK-SAME: (i1 [[B0:%.*]], i1 [[B1:%.*]]) {
+; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT:    [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT:    br i1 [[B0]], label [[BB0:%.*]], label [[BB1:%.*]]
+; CHECK:       bb0:
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
+; CHECK-NEXT:    br label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT:    ret void
+;
+  %src = alloca %struct.Foo, align 4
+  %dest = alloca %struct.Foo, align 4
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src)
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
+  store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+  %1 = call i32 @use_nocapture(ptr noundef nocapture %src)
+  br i1 %b0, label %bb0, label %bb1
+
+bb0:
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false)
+  br label %bb2
+
+bb1:
+  %2 = call i32 @use_nocapture(ptr noundef nocapture %src)
+  ; %3 = call i32 @use_writeonly(ptr noundef nocapture %dest)
+  br label %bb2
+
+bb2:
+  ; %4 = call i32 @use_nocapture(ptr noundef nocapture %src)
+  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src)
+  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
+  ret void
+}
+
+; TODO: to merge following `is_def` cases, we need to do liveness analysis
+; or something that distinguish the full-size-Mod as a Def.
+; Tests that a memcpy that completely overwrites a stack value is a definition
+; for the purposes of liveness analysis.
+define void @memcpy_is_def() {
+; CHECK-LABEL: define void @memcpy_is_def() {
+; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT:    [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[SRC]], ptr align 4 [[DEST]], i64 12, i1 false)
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT:    ret void
+;
+  %src = alloca %struct.Foo, align 4
+  %dest = alloca %struct.Foo, align 4
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src)
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
+  store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+  %1 = call i32 @use_nocapture(ptr noundef nocapture %src)
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false)
+  %2 = call i32 @use_nocapture(ptr noundef nocapture %dest)
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %src, ptr align 4 %dest, i64 12, i1 false)
+  %3 = call i32 @use_nocapture(ptr noundef nocapture %src)
+  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src)
+  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
+  ret void
+}
+
+; TODO: merge allocas
+; Tests that a memset that completely overwrites a stack value is a definition
+; for the purposes of liveness analysis.
+define void @memset_is_def() {
+; CHECK-LABEL: define void @memset_is_def() {
+; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT:    [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[SRC]], i8 42, i64 12, i1 false)
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT:    ret void
+;
+  %src = alloca %struct.Foo, align 4
+  %dest = alloca %struct.Foo, align 4
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src)
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
+  store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+  %1 = call i32 @use_nocapture(ptr noundef nocapture %src)
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false)
+  %2 = call i32 @use_nocapture(ptr noundef nocapture %dest)
+  call void @llvm.memset.p0.i64(ptr align 4 %src, i8 42, i64 12, i1 false)
+  %3 = call i32 @use_nocapture(ptr noundef nocapture %src)
+  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src)
+  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
+  ret void
+}
+
+; TODO: merge allocas
+; Tests that a store that completely overwrites a stack value is a definition
+; for the purposes of liveness analysis.
+define void @store_is_def() {
+; CHECK-LABEL: define void @store_is_def() {
+; CHECK-NEXT:    [[SRC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[DEST:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nocapture [[DEST]])
+; CHECK-NEXT:    store i32 42, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[SRC]], align 4
+; CHECK-NEXT:    store i32 [[TMP2]], ptr [[DEST]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
+; CHECK-NEXT:    store i32 64, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nocapture [[DEST]])
+; CHECK-NEXT:    ret void
+;
+  %src = alloca i32, align 4
+  %dest = alloca i32, align 4
+  call void @llvm.lifetime.start.p0(i64 4, ptr nocapture %src)
+  call void @llvm.lifetime.start.p0(i64 4, ptr nocapture %dest)
+  store i32 42, ptr %src
+  %1 = call i32 @use_nocapture(ptr noundef nocapture %src)
+  %2 = load i32, ptr %src
+  store i32 %2, ptr %dest
+  %3 = call i32 @use_nocapture(ptr noundef nocapture %dest)
+  store i32 64, ptr %src
+  %4 = call i32 @use_nocapture(ptr noundef nocapture %src)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nocapture %src)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nocapture %dest)
+  ret void
 }
 
 ; Optimization failures follow:
 
+; Tests that a memcpy that doesn't completely overwrite a stack value is a use
+; for the purposes of liveness analysis, not a definition.
+define void @incomplete_memcpy() {
+; CHECK-LABEL: define void @incomplete_memcpy() {
+; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT:    [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 11, i1 false)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT:    ret void
+;
+  %src = alloca %struct.Foo, align 4
+  %dest = alloca %struct.Foo, align 4
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src)
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
+  store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+  %1 = call i32 @use_nocapture(ptr noundef nocapture %src)
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 11, i1 false)
+  %2 = call i32 @use_nocapture(ptr noundef nocapture %dest)
+  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src)
+  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
+  ret void
+}
+
+; Tests that a store that doesn't completely overwrite a stack value is a use
+; for the purposes of liveness analysis, not a definition.
+define void @incomplete_store() {
+; CHECK-LABEL: define void @incomplete_store() {
+; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT:    [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[SRC]], align 4
+; CHECK-NEXT:    store i32 [[TMP2]], ptr [[DEST]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT:    ret void
+;
+  %src = alloca %struct.Foo, align 4
+  %dest = alloca %struct.Foo, align 4
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src)
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
+  store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+  %1 = call i32 @use_nocapture(ptr noundef nocapture %src)
+  %2 = load i32, ptr %src
+  store i32 %2, ptr %dest
+  %3 = call i32 @use_nocapture(ptr noundef nocapture %dest)
+  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src)
+  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
+  ret void
+}
+
 ; Tests that dynamically-sized allocas are never merged.
 define void @dynamically_sized_alloca(i64 %i) {
 ; CHECK-LABEL: define void @dynamically_sized_alloca
@@ -1050,6 +1424,96 @@ define void @alias_src_ref_dest_mod_after_copy() {
   ret void
 }
 
+; Tests that the optimization isn't performed when the source and destination
+; have mod ref conflict.
+define void @multi_bb_mod_ref_conflict(i1 %b) {
+; CHECK-LABEL: define void @multi_bb_mod_ref_conflict
+; CHECK-SAME: (i1 [[B:%.*]]) {
+; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT:    [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC]], i64 12, i1 false)
+; CHECK-NEXT:    br i1 [[B]], label [[BB0:%.*]], label [[BB1:%.*]]
+; CHECK:       bb0:
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[SRC]])
+; CHECK-NEXT:    br label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @use_nocapture(ptr nocapture noundef [[DEST]])
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT:    ret void
+;
+  %src = alloca %struct.Foo, align 4
+  %dest = alloca %struct.Foo, align 4
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src)
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
+  store %struct.Foo { i32 10, i32 20, i32 30 }, ptr %src
+  %1 = call i32 @use_nocapture(ptr noundef nocapture %src)
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src, i64 12, i1 false)
+  br i1 %b, label %bb0, label %bb1
+
+bb0:
+  %2 = call i32 @use_nocapture(ptr noundef nocapture %src)
+  br label %bb2
+
+bb1:
+  %3 = call i32 @use_nocapture(ptr noundef nocapture %dest)
+  br label %bb2
+
+bb2:
+  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %src)
+  call void @llvm.lifetime.end.p0(i64 12, ptr nocapture %dest)
+  ret void
+}
+
+; Tests that failure because after copy mod could be before copy on loop.
+define void @multi_bb_loop_dest_mod_before_copy(i32 %n) {
+; CHECK-LABEL: define void @multi_bb_loop_dest_mod_before_copy
+; CHECK-SAME: (i32 [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[NLT1:%.*]] = icmp slt i32 [[N]], 1
+; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8
+; CHECK-NEXT:    [[DEST:%.*]] = alloca [[STRUCT_FOO]], align 8
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[SRC]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 12, ptr nocapture [[DEST]])
+; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 0, i32 1, i32 42 }, ptr [[SRC]], align 4
+; CHECK-NEXT:    br i1 [[NLT1]], label [[LOOP_EXIT:%.*]], label [[LOOP_BODY:%.*]]
+; CHECK:       loop_body:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[NEW_I:%.*]], [[LOOP_BODY]] ], [ 1, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DEST]], ptr align 8 [[SRC]], i64 12, i1 false)
+; CHECK-NEXT:    [[NEW_I]] = add i32 [[I]], 1
+; CHECK-NEXT:    store i32 [[NEW_I]], ptr [[DEST]], align 4
+; CHECK-NEXT:    [[IGTN:%.*]] = icmp sgt i32 [[NEW_I]], [[N]]
+; CHECK-NEXT:    br i1 [[IGTN]], label [[LOOP_EXIT]], label [[LOOP_BODY]]
+; CHECK:       loop_exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %nlt1 = icmp slt i32 %n, 1
+  %src = alloca %struct.Foo, align 8
+  %dest = alloca %struct.Foo, align 8
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %src)
+  call void @llvm.lifetime.start.p0(i64 12, ptr nocapture %dest)
+  store %struct.Foo { i32 0, i32 1, i32 42 }, ptr %src
+  br i1 %nlt1, label %loop_exit, label %loop_body
+
+loop_body:
+  %i = phi i32 [ %new_i, %loop_body ], [ 1, %entry ]
+  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 12, i1 false)
+  %new_i = add i32 %i, 1
+  store i32 %new_i, ptr %dest
+  %igtn = icmp sgt i32 %new_i, %n
+  br i1 %igtn, label %loop_exit, label %loop_body
+
+loop_exit:
+  ret void
+}
+
 ; Tests that failure because partial-sized lifetimes are counted as mod.
 define void @partial_lifetime() {
 ; CHECK-LABEL: define void @partial_lifetime() {


        


More information about the llvm-commits mailing list