[llvm] 616f545 - [MemCpyOpt] Use dereferenceable pointer helper

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 6 09:45:18 PDT 2020


Author: Nikita Popov
Date: 2020-10-06T18:41:19+02:00
New Revision: 616f5450480214d40dd69a5f5f0f10b41bd4b3e2

URL: https://github.com/llvm/llvm-project/commit/616f5450480214d40dd69a5f5f0f10b41bd4b3e2
DIFF: https://github.com/llvm/llvm-project/commit/616f5450480214d40dd69a5f5f0f10b41bd4b3e2.diff

LOG: [MemCpyOpt] Use dereferenceable pointer helper

The call slot optimization has some home-grown code for checking
whether the destination is dereferenceable. Replace this with the
generic isDereferenceableAndAlignedPointer() helper.

I'm not checking alignment here, because that is currently handled
separately and may be an enforced alignment for allocas. The clean
way of integrating that part would probably be to accept a callback
in isDereferenceableAndAlignedPointer() for the actual isAligned check,
which would then have a chance to use an enforced alignment instead.

This allows the destination to be a GEP (among other things), though
the two open TODOs may prevent it from working in practice.

Differential Revision: https://reviews.llvm.org/D88805

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
    llvm/test/Transforms/MemCpyOpt/callslot.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 49f76d37ec0d..64821c70217b 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/MemorySSA.h"
@@ -788,40 +789,9 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
   // Check that accessing the first srcSize bytes of dest will not cause a
   // trap.  Otherwise the transform is invalid since it might cause a trap
   // to occur earlier than it otherwise would.
-  // TODO: Use isDereferenceablePointer() API instead.
-  if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
-    // The destination is an alloca.  Check it is larger than srcSize.
-    ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
-    if (!destArraySize)
-      return false;
-
-    uint64_t destSize = DL.getTypeAllocSize(A->getAllocatedType()) *
-                        destArraySize->getZExtValue();
-
-    if (destSize < srcSize)
-      return false;
-  } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
-    if (A->getDereferenceableBytes() < srcSize) {
-      // If the destination is an sret parameter then only accesses that are
-      // outside of the returned struct type can trap.
-      if (!A->hasStructRetAttr())
-        return false;
-
-      Type *StructTy = A->getParamStructRetType();
-      if (!StructTy->isSized()) {
-        // The call may never return and hence the copy-instruction may never
-        // be executed, and therefore it's not safe to say "the destination
-        // has at least <cpyLen> bytes, as implied by the copy-instruction",
-        return false;
-      }
-
-      uint64_t destSize = DL.getTypeAllocSize(StructTy);
-      if (destSize < srcSize)
-        return false;
-    }
-  } else {
+  if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen),
+                                          DL, C, DT))
     return false;
-  }
 
   // Make sure that nothing can observe cpyDest being written early. There are
   // a number of cases to consider:
@@ -837,6 +807,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
   //     guaranteed to be executed if C is. As it is a non-atomic access, it
   //     renders accesses from other threads undefined.
   //     TODO: This is currently not checked.
+  // TODO: Check underlying object, so we can look through GEPs.
   if (!isa<AllocaInst>(cpyDest)) {
     assert(C->getParent() == cpyStore->getParent() &&
            "call and copy must be in the same block");
@@ -893,6 +864,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
 
   // Since we're changing the parameter to the callsite, we need to make sure
   // that what would be the new parameter dominates the callsite.
+  // TODO: Support moving instructions like GEPs upwards.
   if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
     if (!DT->dominates(cpyDestInst, C))
       return false;

diff  --git a/llvm/test/Transforms/MemCpyOpt/callslot.ll b/llvm/test/Transforms/MemCpyOpt/callslot.ll
index 3b495e5f3fa7..90f1833a2d5c 100644
--- a/llvm/test/Transforms/MemCpyOpt/callslot.ll
+++ b/llvm/test/Transforms/MemCpyOpt/callslot.ll
@@ -110,8 +110,9 @@ define void @dest_is_gep_nounwind_call() {
 ; CHECK-NEXT:    [[SRC:%.*]] = alloca [8 x i8], align 1
 ; CHECK-NEXT:    [[SRC_I8:%.*]] = bitcast [8 x i8]* [[SRC]] to i8*
 ; CHECK-NEXT:    [[DEST_I8:%.*]] = getelementptr [16 x i8], [16 x i8]* [[DEST]], i64 0, i64 8
-; CHECK-NEXT:    call void @accept_ptr(i8* [[SRC_I8]]) [[ATTR3:#.*]]
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST_I8]], i8* [[SRC_I8]], i64 8, i1 false)
+; CHECK-NEXT:    [[DEST_I81:%.*]] = bitcast i8* [[DEST_I8]] to [8 x i8]*
+; CHECK-NEXT:    [[DEST_I812:%.*]] = bitcast [8 x i8]* [[DEST_I81]] to i8*
+; CHECK-NEXT:    call void @accept_ptr(i8* [[DEST_I812]]) [[ATTR3:#.*]]
 ; CHECK-NEXT:    ret void
 ;
   %dest = alloca [16 x i8]


        


More information about the llvm-commits mailing list