[llvm] c2e77c9 - [MemCpyOpt] Add additional call slot capture tests (NFC)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 5 00:34:04 PST 2022


Author: Nikita Popov
Date: 2022-01-05T09:33:04+01:00
New Revision: c2e77c91229e4db62af1879b831a4bc0a9ca3afb

URL: https://github.com/llvm/llvm-project/commit/c2e77c91229e4db62af1879b831a4bc0a9ca3afb
DIFF: https://github.com/llvm/llvm-project/commit/c2e77c91229e4db62af1879b831a4bc0a9ca3afb.diff

LOG: [MemCpyOpt] Add additional call slot capture tests (NFC)

Added: 
    

Modified: 
    llvm/test/Transforms/MemCpyOpt/capturing-func.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/MemCpyOpt/capturing-func.ll b/llvm/test/Transforms/MemCpyOpt/capturing-func.ll
index 0ac61256c5912..aacf261918cf9 100644
--- a/llvm/test/Transforms/MemCpyOpt/capturing-func.ll
+++ b/llvm/test/Transforms/MemCpyOpt/capturing-func.ll
@@ -1,5 +1,5 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -basic-aa -memcpyopt -S -verify-memoryssa | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
+; RUN: opt < %s -basic-aa -globals-aa -memcpyopt -S -verify-memoryssa | FileCheck %s
 
 target datalayout = "e"
 
@@ -11,7 +11,7 @@ declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
 ; Check that the transformation isn't applied if the called function can
 ; capture the pointer argument (i.e. the nocapture attribute isn't present)
 define void @test() {
-; CHECK-LABEL: @test(
+; CHECK-LABEL: define {{[^@]+}}@test() {
 ; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    call void @foo(i8* [[PTR2]])
@@ -30,7 +30,7 @@ define void @test() {
 ; Same as previous test, but with a bitcasted argument.
 ; TODO: Call slot optimization should not be applied here.
 define void @test_bitcast() {
-; CHECK-LABEL: @test_bitcast(
+; CHECK-LABEL: define {{[^@]+}}@test_bitcast() {
 ; CHECK-NEXT:    [[PTR1:%.*]] = alloca [2 x i8], align 1
 ; CHECK-NEXT:    [[PTR2:%.*]] = alloca [2 x i8], align 1
 ; CHECK-NEXT:    [[PTR1_CAST:%.*]] = bitcast [2 x i8]* [[PTR1]] to i8*
@@ -53,7 +53,7 @@ define void @test_bitcast() {
 ; Lifetime of %ptr2 ends before the potential use of the capture in the second
 ; call.
 define void @test_lifetime_end() {
-; CHECK-LABEL: @test_lifetime_end(
+; CHECK-LABEL: define {{[^@]+}}@test_lifetime_end() {
 ; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 1, i8* [[PTR2]])
@@ -73,19 +73,148 @@ define void @test_lifetime_end() {
   ret void
 }
 
+; Lifetime of %ptr2 does not end, because of size mismatch.
+define void @test_lifetime_not_end() {
+; CHECK-LABEL: define {{[^@]+}}@test_lifetime_not_end() {
+; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 1, i8* [[PTR2]])
+; CHECK-NEXT:    call void @foo(i8* [[PTR2]])
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1]], i8* [[PTR2]], i32 1, i1 false)
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 0, i8* [[PTR2]])
+; CHECK-NEXT:    call void @foo(i8* [[PTR1]])
+; CHECK-NEXT:    ret void
+;
+  %ptr1 = alloca i8
+  %ptr2 = alloca i8
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %ptr2)
+  call void @foo(i8* %ptr2)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i1 false)
+  call void @llvm.lifetime.end.p0i8(i64 0, i8* %ptr2)
+  call void @foo(i8* %ptr1)
+  ret void
+}
+
 ; Lifetime of %ptr2 ends before any potential use of the capture because we
 ; return from the function.
 define void @test_function_end() {
-; CHECK-LABEL: @test_function_end(
+; CHECK-LABEL: define {{[^@]+}}@test_function_end() {
+; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    call void @foo(i8* [[PTR2]])
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1]], i8* [[PTR2]], i32 1, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %ptr1 = alloca i8
+  %ptr2 = alloca i8
+  call void @foo(i8* %ptr2)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i1 false)
+  ret void
+}
+
+; A potential use of the capture occurs in a later block, can't be optimized.
+define void @test_terminator() {
+; CHECK-LABEL: define {{[^@]+}}@test_terminator() {
 ; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    call void @foo(i8* [[PTR2]])
 ; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1]], i8* [[PTR2]], i32 1, i1 false)
+; CHECK-NEXT:    br label [[NEXT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    call void @foo(i8* [[PTR1]])
 ; CHECK-NEXT:    ret void
 ;
   %ptr1 = alloca i8
   %ptr2 = alloca i8
   call void @foo(i8* %ptr2)
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i1 false)
+  br label %next
+
+next:
+  call void @foo(i8* %ptr1)
+  ret void
+}
+
+; This case can be optimized, but would require a scan across multiple blocks
+; and is currently not performed.
+define void @test_terminator2() {
+; CHECK-LABEL: define {{[^@]+}}@test_terminator2() {
+; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    call void @foo(i8* [[PTR2]])
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1]], i8* [[PTR2]], i32 1, i1 false)
+; CHECK-NEXT:    br label [[NEXT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+  %ptr1 = alloca i8
+  %ptr2 = alloca i8
+  call void @foo(i8* %ptr2)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i1 false)
+  br label %next
+
+next:
+  ret void
+}
+
+declare void @capture(i8*)
+
+; This case should not be optimized, because dest is captured before the call.
+define void @test_dest_captured_before_alloca() {
+; CHECK-LABEL: define {{[^@]+}}@test_dest_captured_before_alloca() {
+; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    call void @capture(i8* [[PTR1]])
+; CHECK-NEXT:    call void @foo(i8* [[PTR2]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1]], i8* [[PTR2]], i32 1, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %ptr1 = alloca i8
+  %ptr2 = alloca i8
+  call void @capture(i8* %ptr1)
+  call void @foo(i8* %ptr2) argmemonly
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i1 false)
+  ret void
+}
+
+
+ at g = internal global i8 0
+
+; This case should not be optimized, because @g is captured before the call
+; (being a global) and @icmp_g might depend on its identity.
+define void @test_dest_captured_before_global() {
+; CHECK-LABEL: define {{[^@]+}}@test_dest_captured_before_global() {
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    call void @icmp_g(i8* [[PTR]])
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* @g, i8* [[PTR]], i32 1, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %ptr = alloca i8
+  call void @icmp_g(i8* %ptr)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* @g, i8* %ptr, i32 1, i1 false)
+  ret void
+}
+
+define void @icmp_g(i8* %p) {
+; CHECK-LABEL: define {{[^@]+}}@icmp_g
+; CHECK-SAME: (i8* [[P:%.*]]) {
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8* [[P]], @g
+; CHECK-NEXT:    br i1 [[C]], label [[IF:%.*]], label [[ELSE:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    store i8 1, i8* [[P]], align 1
+; CHECK-NEXT:    ret void
+; CHECK:       else:
+; CHECK-NEXT:    store i8 2, i8* [[P]], align 1
+; CHECK-NEXT:    ret void
+;
+  %c = icmp eq i8* %p, @g
+  br i1 %c, label %if, label %else
+
+if:
+  store i8 1, i8* %p
+  ret void
+
+else:
+  store i8 2, i8* %p
   ret void
 }


        


More information about the llvm-commits mailing list