[llvm] r219950 - Allow call-slop optzn for destinations with a suitable dereferenceable attribute

Bjorn Steinbrink bsteinbr at gmail.com
Thu Oct 16 12:43:09 PDT 2014


Author: bsteinbr
Date: Thu Oct 16 14:43:08 2014
New Revision: 219950

URL: http://llvm.org/viewvc/llvm-project?rev=219950&view=rev
Log:
Allow call-slop optzn for destinations with a suitable dereferenceable attribute

Summary:
Currently, call slot optimization requires that if the destination is an
argument, the argument has the sret attribute. This is to ensure that
the memory access won't trap. In addition to sret, we can also allow the
optimization to happen for arguments that have the new dereferenceable
attribute, which gives the same guarantee.

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D5832

Added:
    llvm/trunk/test/Transforms/MemCpyOpt/callslot_deref.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp?rev=219950&r1=219949&r2=219950&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp Thu Oct 16 14:43:08 2014
@@ -634,22 +634,24 @@ bool MemCpyOpt::performCallSlotOptzn(Ins
     if (destSize < srcSize)
       return false;
   } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
-    // If the destination is an sret parameter then only accesses that are
-    // outside of the returned struct type can trap.
-    if (!A->hasStructRetAttr())
-      return false;
+    if (A->getDereferenceableBytes() < srcSize) {
+      // If the destination is an sret parameter then only accesses that are
+      // outside of the returned struct type can trap.
+      if (!A->hasStructRetAttr())
+        return false;
 
-    Type *StructTy = cast<PointerType>(A->getType())->getElementType();
-    if (!StructTy->isSized()) {
-      // The call may never return and hence the copy-instruction may never
-      // be executed, and therefore it's not safe to say "the destination
-      // has at least <cpyLen> bytes, as implied by the copy-instruction",
-      return false;
-    }
+      Type *StructTy = cast<PointerType>(A->getType())->getElementType();
+      if (!StructTy->isSized()) {
+        // The call may never return and hence the copy-instruction may never
+        // be executed, and therefore it's not safe to say "the destination
+        // has at least <cpyLen> bytes, as implied by the copy-instruction",
+        return false;
+      }
 
-    uint64_t destSize = DL->getTypeAllocSize(StructTy);
-    if (destSize < srcSize)
-      return false;
+      uint64_t destSize = DL->getTypeAllocSize(StructTy);
+      if (destSize < srcSize)
+        return false;
+    }
   } else {
     return false;
   }

Added: llvm/trunk/test/Transforms/MemCpyOpt/callslot_deref.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/callslot_deref.ll?rev=219950&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/callslot_deref.ll (added)
+++ llvm/trunk/test/Transforms/MemCpyOpt/callslot_deref.ll Thu Oct 16 14:43:08 2014
@@ -0,0 +1,29 @@
+; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) unnamed_addr nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+; all bytes of %dst that are touch by the memset are dereferenceable
+define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst) {
+; CHECK-LABEL: @must_remove_memcpy(
+; CHECK: call void @llvm.memset.p0i8.i64
+; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64
+  %src = alloca [4096 x i8], align 1
+  %p = getelementptr inbounds [4096 x i8]* %src, i64 0, i64 0
+  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2
+  ret void
+}
+
+; memset touch more bytes than those guaranteed to be dereferenceable
+define void @must_not_remove_memcpy(i8* noalias nocapture dereferenceable(1024) %dst) {
+; CHECK-LABEL: @must_not_remove_memcpy(
+; CHECK: call void @llvm.memset.p0i8.i64
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+  %src = alloca [4096 x i8], align 1
+  %p = getelementptr inbounds [4096 x i8]* %src, i64 0, i64 0
+  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2
+  ret void
+}





More information about the llvm-commits mailing list