[llvm] 53a51ac - Revert "[MemCpyOpt] Make capture check during call slot optimization more precise"

Hans Wennborg via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 18 08:42:32 PST 2022


Author: Hans Wennborg
Date: 2022-01-18T17:41:49+01:00
New Revision: 53a51acc361a6b20b23b032226b0a7b124465cf7

URL: https://github.com/llvm/llvm-project/commit/53a51acc361a6b20b23b032226b0a7b124465cf7
DIFF: https://github.com/llvm/llvm-project/commit/53a51acc361a6b20b23b032226b0a7b124465cf7.diff

LOG: Revert "[MemCpyOpt] Make capture check during call slot optimization more precise"

This casued a miscompile due to call slot optimization replacing a call
argument without considering the call's !noalias metadata, see discussion on
the code review.

> Call slot optimization is currently supposed to be prevented if
> the call can capture the source pointer. Due to an implementation
> bug, this check currently doesn't trigger if a bitcast of the source
> pointer is passed instead. I'm somewhat afraid of the fallout of
> fixing this bug (due to heavy reliance on call slot optimization
> in rust), so I'd like to strengthen the capture reasoning a bit first.
>
> In particular, I believe that the capture is fine as long as a)
> the call itself cannot depend on the pointer identity, because
> neither dest has been captured before/at nor src before the
> call and b) there is no potential use of the captured pointer
> before the lifetime of the source alloca ends, either due to
> lifetime.end or a return from a function. At that point the
> potentially captured pointer becomes dangling.
>
> Differential Revision: https://reviews.llvm.org/D115615

Also reverting the dependent commit:

> [MemCpyOpt] Look through pointer casts when checking capture
>
> The user scanning loop above looks through pointer casts, so we
> also need to strip pointer casts in the capture check. Previously
> the source was incorrectly considered not captured if a bitcast
> was passed to the call.

This reverts commit 487a34ed9d7d24a7b1fb388c8856c784a459b22b
and 00e6869463ae6023d0d48f30de8511d6d748b14f.

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
    llvm/test/Transforms/MemCpyOpt/callslot.ll
    llvm/test/Transforms/MemCpyOpt/capturing-func.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 19e2efd4824b5..dd81c6ffcb530 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -20,7 +20,6 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/MemoryLocation.h"
@@ -946,57 +945,12 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
       return false;
   }
 
-  // Check whether src is captured by the called function, in which case there
-  // may be further indirect uses of src.
-  bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
-    return U->stripPointerCasts() == cpySrc &&
-           !C->doesNotCapture(C->getArgOperandNo(&U));
-  });
-
-  // If src is captured, then check whether there are any potential uses of
-  // src through the captured pointer before the lifetime of src ends, either
-  // due to a lifetime.end or a return from the function.
-  if (SrcIsCaptured) {
-    // Check that dest is not captured before/at the call. We have already
-    // checked that src is not captured before it. If either had been captured,
-    // then the call might be comparing the argument against the captured dest
-    // or src pointer.
-    Value *DestObj = getUnderlyingObject(cpyDest);
-    if (!isIdentifiedFunctionLocal(DestObj) ||
-        PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
-                                   /* StoreCaptures */ true, C, DT,
-                                   /* IncludeI */ true))
+  // Check that src isn't captured by the called function since the
+  // transformation can cause aliasing issues in that case.
+  for (unsigned ArgI = 0, E = C->arg_size(); ArgI != E; ++ArgI)
+    if (C->getArgOperand(ArgI) == cpySrc && !C->doesNotCapture(ArgI))
       return false;
 
-    MemoryLocation SrcLoc =
-        MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
-    for (Instruction &I :
-         make_range(++C->getIterator(), C->getParent()->end())) {
-      // Lifetime of srcAlloca ends at lifetime.end.
-      if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
-        if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
-            II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
-            cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
-          break;
-      }
-
-      // Lifetime of srcAlloca ends at return.
-      if (isa<ReturnInst>(&I))
-        break;
-
-      // Ignore the direct read of src in the load.
-      if (&I == cpyLoad)
-        continue;
-
-      // Check whether this instruction may mod/ref src through the captured
-      // pointer (we have already any direct mod/refs in the loop above).
-      // Also bail if we hit a terminator, as we don't want to scan into other
-      // blocks.
-      if (isModOrRefSet(AA->getModRefInfo(&I, SrcLoc)) || I.isTerminator())
-        return false;
-    }
-  }
-
   // Since we're changing the parameter to the callsite, we need to make sure
   // that what would be the new parameter dominates the callsite.
   if (!DT->dominates(cpyDest, C)) {

diff  --git a/llvm/test/Transforms/MemCpyOpt/callslot.ll b/llvm/test/Transforms/MemCpyOpt/callslot.ll
index 5cb57022198fa..c118646669ea4 100644
--- a/llvm/test/Transforms/MemCpyOpt/callslot.ll
+++ b/llvm/test/Transforms/MemCpyOpt/callslot.ll
@@ -172,7 +172,7 @@ define void @capture_before_call_argmemonly() {
 ; CHECK-NEXT:    [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8*
 ; CHECK-NEXT:    call void @accept_ptr(i8* [[DEST_I8]])
 ; CHECK-NEXT:    [[DEST1:%.*]] = bitcast [16 x i8]* [[DEST]] to i8*
-; CHECK-NEXT:    call void @accept_ptr(i8* nocapture [[DEST1]]) #[[ATTR4:[0-9]+]]
+; CHECK-NEXT:    call void @accept_ptr(i8* [[DEST1]]) #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
   %dest = alloca [16 x i8]
@@ -180,7 +180,7 @@ define void @capture_before_call_argmemonly() {
   %dest.i8 = bitcast [16 x i8]* %dest to i8*
   %src.i8 = bitcast [16 x i8]* %src to i8*
   call void @accept_ptr(i8* %dest.i8) ; capture
-  call void @accept_ptr(i8* nocapture %src.i8) argmemonly
+  call void @accept_ptr(i8* %src.i8) argmemonly
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest.i8, i8* %src.i8, i64 16, i1 false)
   ret void
 }
@@ -193,7 +193,7 @@ define void @capture_before_call_argmemonly_nounwind() {
 ; CHECK-NEXT:    [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8*
 ; CHECK-NEXT:    call void @accept_ptr(i8* [[DEST_I8]])
 ; CHECK-NEXT:    [[DEST1:%.*]] = bitcast [16 x i8]* [[DEST]] to i8*
-; CHECK-NEXT:    call void @accept_ptr(i8* nocapture [[DEST1]]) #[[ATTR5:[0-9]+]]
+; CHECK-NEXT:    call void @accept_ptr(i8* [[DEST1]]) #[[ATTR5:[0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
   %dest = alloca [16 x i8]
@@ -202,7 +202,7 @@ define void @capture_before_call_argmemonly_nounwind() {
   %src.i8 = bitcast [16 x i8]* %src to i8*
   call void @accept_ptr(i8* %dest.i8) ; capture
   ; NB: argmemonly currently implies willreturn.
-  call void @accept_ptr(i8* nocapture %src.i8) argmemonly nounwind
+  call void @accept_ptr(i8* %src.i8) argmemonly nounwind
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest.i8, i8* %src.i8, i64 16, i1 false)
   ret void
 }
@@ -215,7 +215,7 @@ define void @capture_before_call_argmemonly_nounwind_willreturn() {
 ; CHECK-NEXT:    [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8*
 ; CHECK-NEXT:    call void @accept_ptr(i8* [[DEST_I8]])
 ; CHECK-NEXT:    [[DEST1:%.*]] = bitcast [16 x i8]* [[DEST]] to i8*
-; CHECK-NEXT:    call void @accept_ptr(i8* nocapture [[DEST1]]) #[[ATTR6:[0-9]+]]
+; CHECK-NEXT:    call void @accept_ptr(i8* [[DEST1]]) #[[ATTR6:[0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
   %dest = alloca [16 x i8]
@@ -223,7 +223,7 @@ define void @capture_before_call_argmemonly_nounwind_willreturn() {
   %dest.i8 = bitcast [16 x i8]* %dest to i8*
   %src.i8 = bitcast [16 x i8]* %src to i8*
   call void @accept_ptr(i8* %dest.i8) ; capture
-  call void @accept_ptr(i8* nocapture %src.i8) argmemonly nounwind willreturn
+  call void @accept_ptr(i8* %src.i8) argmemonly nounwind willreturn
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest.i8, i8* %src.i8, i64 16, i1 false)
   ret void
 }

diff  --git a/llvm/test/Transforms/MemCpyOpt/capturing-func.ll b/llvm/test/Transforms/MemCpyOpt/capturing-func.ll
index 84c1446ae0a4b..aacf261918cf9 100644
--- a/llvm/test/Transforms/MemCpyOpt/capturing-func.ll
+++ b/llvm/test/Transforms/MemCpyOpt/capturing-func.ll
@@ -28,14 +28,15 @@ define void @test() {
 }
 
 ; Same as previous test, but with a bitcasted argument.
+; TODO: Call slot optimization should not be applied here.
 define void @test_bitcast() {
 ; CHECK-LABEL: define {{[^@]+}}@test_bitcast() {
 ; CHECK-NEXT:    [[PTR1:%.*]] = alloca [2 x i8], align 1
 ; CHECK-NEXT:    [[PTR2:%.*]] = alloca [2 x i8], align 1
 ; CHECK-NEXT:    [[PTR1_CAST:%.*]] = bitcast [2 x i8]* [[PTR1]] to i8*
 ; CHECK-NEXT:    [[PTR2_CAST:%.*]] = bitcast [2 x i8]* [[PTR2]] to i8*
-; CHECK-NEXT:    call void @foo(i8* [[PTR2_CAST]])
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1_CAST]], i8* [[PTR2_CAST]], i32 2, i1 false)
+; CHECK-NEXT:    [[PTR11:%.*]] = bitcast [2 x i8]* [[PTR1]] to i8*
+; CHECK-NEXT:    call void @foo(i8* [[PTR11]])
 ; CHECK-NEXT:    call void @foo(i8* [[PTR1_CAST]])
 ; CHECK-NEXT:    ret void
 ;
@@ -56,7 +57,8 @@ define void @test_lifetime_end() {
 ; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 1, i8* [[PTR2]])
-; CHECK-NEXT:    call void @foo(i8* [[PTR1]])
+; CHECK-NEXT:    call void @foo(i8* [[PTR2]])
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1]], i8* [[PTR2]], i32 1, i1 false)
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 1, i8* [[PTR2]])
 ; CHECK-NEXT:    call void @foo(i8* [[PTR1]])
 ; CHECK-NEXT:    ret void
@@ -99,7 +101,8 @@ define void @test_function_end() {
 ; CHECK-LABEL: define {{[^@]+}}@test_function_end() {
 ; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    call void @foo(i8* [[PTR1]])
+; CHECK-NEXT:    call void @foo(i8* [[PTR2]])
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1]], i8* [[PTR2]], i32 1, i1 false)
 ; CHECK-NEXT:    ret void
 ;
   %ptr1 = alloca i8


        


More information about the llvm-commits mailing list