[llvm] r348645 - [MemCpyOpt] memset->memcpy forwarding with undef tail

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 13 03:31:05 PST 2018


Hey David,

Thanks a lot for creating a test case and taking care of the revert. I'll
look into what's going wrong here...

Nikita

On Thu, Dec 13, 2018 at 4:04 AM David Jones <dlj at google.com> wrote:

> Here is a test case that demonstrates the incorrectly-truncated memset:
>
> =====
> %"struct.T1" = type { i32, i32, i8, i8, i8, i32, i32, double, double,
> %"class.T3", %"class.T3" }
> %"class.T2" = type { i8* }
> %"class.T3" = type { i8* }
>
> ; Function Attrs: inlinehint nounwind uwtable
> define linkonce_odr dso_local void @f(%"class.T2"* noalias sret) comdat {
>   %2 = alloca %"struct.T1", align 8
>   %3 = bitcast %"struct.T1"* %2 to i8*
>
>   %4 = getelementptr inbounds %"struct.T1", %"struct.T1"* %2, i64 0, i32 5
>   store i32 0, i32* %4, align 4
>   %5 = getelementptr inbounds %"struct.T1", %"struct.T1"* %2, i64 0, i32 6
>   store i32 0, i32* %5, align 8
>
>   %6 = getelementptr inbounds %"struct.T1", %"struct.T1"* %2, i64 0, i32 7
>   %7 = bitcast double* %6 to i8*
>   call void @llvm.memset.p0i8.i64(i8* nonnull align 8 %7, i8 0, i64 64, i1
> false) #3
>
>   %8 = bitcast %"struct.T1"* %2 to i8*
>   call void @llvm.memset.p0i8.i64(i8* nonnull align 8 %8, i8 0, i64 11, i1
> false)
>
>   %9 = tail call i8* @_Znwm(i64 88) #19
>   call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 %9, i8* nonnull
> align 8 %3, i64 40, i1 false) #3
>
>   %10 = getelementptr inbounds i8, i8* %9, i64 40
>   %11 = bitcast i8* %10 to %"class.T3"*
>   %12 = getelementptr inbounds %"struct.T1", %"struct.T1"* %2, i64 0, i32 9
>   call void @ext_f1(%"class.T3"* nonnull %11, %"class.T3"* nonnull
> dereferenceable(24) %12) #3
>
>   %13 = ptrtoint i8* %9 to i64
>   %14 = bitcast %"class.T2"* %0 to i64*
>   store i64 %13, i64* %14, align 8
>
>   ret void
> }
>
> declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8*
> nocapture readonly, i64, i1)
> declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1)
> declare noalias nonnull i8* @_Znwm(i64)
> declare void @ext_f1(%"class.T3"*, %"class.T3"* dereferenceable(24))
> unnamed_addr
>
> $f = comdat any
>
> @keep = dso_local unnamed_addr constant { i8* } { i8* bitcast (void
> (%"class.T2"*)* @f to i8*) }, align 8
> =====
>
> Here is the problematic output:
>
> =====
> $ diff -u --label before <( $OPT_r348577 /tmp/memset-struct.ll -O2 -f |
> ./llvm-dis -f - ) --label after <( $OPT_r348661 /tmp/memset-struct.ll -O2
> -f | ./llvm-dis -f - )
> --- before
> +++ after
> @@ -21,7 +21,7 @@
>    call void @llvm.memset.p0i8.i64(i8* nonnull align 8 %7, i8 0, i64 64,
> i1 false)
>    call void @llvm.memset.p0i8.i64(i8* nonnull align 8 %3, i8 0, i64 11,
> i1 false)
>    %8 = tail call i8* @_Znwm(i64 88)
> -  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 %8, i8*
> nonnull align 8 %3, i64 40, i1 false)
> +  call void @llvm.memset.p0i8.i64(i8* nonnull align 8 %8, i8 0, i64 11,
> i1 false)
>    %9 = getelementptr inbounds i8, i8* %8, i64 40
>    %10 = bitcast i8* %9 to %class.T3*
>    %11 = getelementptr inbounds %struct.T1, %struct.T1* %2, i64 0, i32 9
> @@ -33,9 +33,6 @@
>  }
>
>  ; Function Attrs: argmemonly nounwind
> -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8*
> nocapture readonly, i64, i1) #0
> -
> -; Function Attrs: argmemonly nounwind
>  declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1)
> #0
>
>  declare noalias nonnull i8* @_Znwm(i64) local_unnamed_addr
> =====
>
> Since I haven't heard anything back since my last message, I will go ahead
> and revert this patch. (Sorry about that... :-/ )
>
> On Wed, Dec 12, 2018 at 4:21 PM David Jones <dlj at google.com> wrote:
>
>> Hello,
>>
>> I'm seeing miscompiles at -O2 that bisect to this revision. I'm working
>> on reducing a test case now... it looks to my (admittedly unfamiliar) eyes
>> like this revision may not correctly handle structs correctly if they are
>> alloca'ed.
>>
>> I'll update when I have more (or a test case).
>>
>> --dlj
>>
>> On Fri, Dec 7, 2018 at 1:19 PM Nikita Popov via llvm-commits <
>> llvm-commits at lists.llvm.org> wrote:
>>
>>> Author: nikic
>>> Date: Fri Dec  7 13:16:58 2018
>>> New Revision: 348645
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=348645&view=rev
>>> Log:
>>> [MemCpyOpt] memset->memcpy forwarding with undef tail
>>>
>>> Currently memcpyopt optimizes cases like
>>>
>>>     memset(a, byte, N);
>>>     memcpy(b, a, M);
>>>
>>> to
>>>
>>>     memset(a, byte, N);
>>>     memset(b, byte, M);
>>>
>>> if M <= N. Often this allows further simplifications down the line,
>>> which drop the first memset entirely.
>>>
>>> This patch extends this optimization for the case where M > N, but we
>>> know that the bytes a[N..M] are undef due to alloca/lifetime.start.
>>>
>>> This situation arises relatively often for Rust code, because Rust does
>>> not initialize trailing structure padding and loves to insert redundant
>>> memcpys. This also fixes https://bugs.llvm.org/show_bug.cgi?id=39844.
>>>
>>> For the implementation, I'm reusing a bit of code for a similar existing
>>> optimization (direct memcpy of undef). I've also added memset support to
>>> MemDepAnalysis GetLocation -- Instead, getPointerDependencyFrom could be
>>> used, but it seems to make more sense to add this to GetLocation and thus
>>> make the computation cachable.
>>>
>>> Differential Revision: https://reviews.llvm.org/D55120
>>>
>>> Modified:
>>>     llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp
>>>     llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp
>>>     llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll
>>>
>>> Modified: llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp?rev=348645&r1=348644&r2=348645&view=diff
>>>
>>> ==============================================================================
>>> --- llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp (original)
>>> +++ llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp Fri Dec  7
>>> 13:16:58 2018
>>> @@ -154,6 +154,12 @@ static ModRefInfo GetLocation(const Inst
>>>      return ModRefInfo::Mod;
>>>    }
>>>
>>> +  if (const MemSetInst *MI = dyn_cast<MemSetInst>(Inst)) {
>>> +    Loc = MemoryLocation::getForDest(MI);
>>> +    // Conversatively assume ModRef for volatile memset.
>>> +    return MI->isVolatile() ? ModRefInfo::ModRef : ModRefInfo::Mod;
>>> +  }
>>> +
>>>    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
>>>      switch (II->getIntrinsicID()) {
>>>      case Intrinsic::lifetime_start:
>>>
>>> Modified: llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp?rev=348645&r1=348644&r2=348645&view=diff
>>>
>>> ==============================================================================
>>> --- llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp (original)
>>> +++ llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp Fri Dec  7
>>> 13:16:58 2018
>>> @@ -1144,6 +1144,21 @@ bool MemCpyOptPass::processMemSetMemCpyD
>>>    return true;
>>>  }
>>>
>>> +/// Determine whether the instruction has undefined content for the
>>> given Size,
>>> +/// either because it was freshly alloca'd or started its lifetime.
>>> +static bool hasUndefContents(Instruction *I, ConstantInt *Size) {
>>> +  if (isa<AllocaInst>(I))
>>> +    return true;
>>> +
>>> +  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
>>> +    if (II->getIntrinsicID() == Intrinsic::lifetime_start)
>>> +      if (ConstantInt *LTSize =
>>> dyn_cast<ConstantInt>(II->getArgOperand(0)))
>>> +        if (LTSize->getZExtValue() >= Size->getZExtValue())
>>> +          return true;
>>> +
>>> +  return false;
>>> +}
>>> +
>>>  /// Transform memcpy to memset when its source was just memset.
>>>  /// In other words, turn:
>>>  /// \code
>>> @@ -1167,12 +1182,23 @@ bool MemCpyOptPass::performMemCpyToMemSe
>>>    if (!AA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
>>>      return false;
>>>
>>> -  ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
>>> +  // A known memset size is required.
>>>    ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength());
>>> +  if (!MemSetSize)
>>> +    return false;
>>> +
>>>    // Make sure the memcpy doesn't read any more than what the memset
>>> wrote.
>>>    // Don't worry about sizes larger than i64.
>>> -  if (!MemSetSize || CopySize->getZExtValue() >
>>> MemSetSize->getZExtValue())
>>> -    return false;
>>> +  ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
>>> +  if (CopySize->getZExtValue() > MemSetSize->getZExtValue()) {
>>> +    // If the memcpy is larger than the memset, but the memory was
>>> undef prior
>>> +    // to the memset, we can just ignore the tail.
>>> +    MemDepResult DepInfo = MD->getDependency(MemSet);
>>> +    if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(),
>>> CopySize))
>>> +      CopySize = MemSetSize;
>>> +    else
>>> +      return false;
>>> +  }
>>>
>>>    IRBuilder<> Builder(MemCpy);
>>>    Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
>>> @@ -1252,19 +1278,7 @@ bool MemCpyOptPass::processMemCpy(MemCpy
>>>      if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
>>>        return processMemCpyMemCpyDependence(M, MDep);
>>>    } else if (SrcDepInfo.isDef()) {
>>> -    Instruction *I = SrcDepInfo.getInst();
>>> -    bool hasUndefContents = false;
>>> -
>>> -    if (isa<AllocaInst>(I)) {
>>> -      hasUndefContents = true;
>>> -    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
>>> -      if (II->getIntrinsicID() == Intrinsic::lifetime_start)
>>> -        if (ConstantInt *LTSize =
>>> dyn_cast<ConstantInt>(II->getArgOperand(0)))
>>> -          if (LTSize->getZExtValue() >= CopySize->getZExtValue())
>>> -            hasUndefContents = true;
>>> -    }
>>> -
>>> -    if (hasUndefContents) {
>>> +    if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) {
>>>        MD->removeInstruction(M);
>>>        M->eraseFromParent();
>>>        ++NumMemCpyInstr;
>>>
>>> Modified: llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll?rev=348645&r1=348644&r2=348645&view=diff
>>>
>>> ==============================================================================
>>> --- llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll
>>> (original)
>>> +++ llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll Fri
>>> Dec  7 13:16:58 2018
>>> @@ -12,7 +12,7 @@ define void @test_alloca(i8* %result) {
>>>  ; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
>>>  ; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
>>>  ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8
>>> 0, i64 12, i1 false)
>>> -; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8*
>>> [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
>>> +; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8
>>> 0, i64 12, i1 false)
>>>  ; CHECK-NEXT:    ret void
>>>  ;
>>>    %a = alloca %T, align 8
>>> @@ -28,7 +28,7 @@ define void @test_alloca_with_lifetimes(
>>>  ; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
>>>  ; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 16, i8* [[B]])
>>>  ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8
>>> 0, i64 12, i1 false)
>>> -; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8*
>>> [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
>>> +; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8
>>> 0, i64 12, i1 false)
>>>  ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* [[B]])
>>>  ; CHECK-NEXT:    ret void
>>>  ;
>>> @@ -46,7 +46,7 @@ define void @test_malloc_with_lifetimes(
>>>  ; CHECK-NEXT:    [[A:%.*]] = call i8* @malloc(i64 16)
>>>  ; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 16, i8* [[A]])
>>>  ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[A]], i8
>>> 0, i64 12, i1 false)
>>> -; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8*
>>> [[RESULT:%.*]], i8* align 8 [[A]], i64 16, i1 false)
>>> +; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8
>>> 0, i64 12, i1 false)
>>>  ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* [[A]])
>>>  ; CHECK-NEXT:    call void @free(i8* [[A]])
>>>  ; CHECK-NEXT:    ret void
>>> @@ -98,7 +98,7 @@ define void @test_volatile_memset(i8* %r
>>>  ; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
>>>  ; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
>>>  ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8
>>> 0, i64 12, i1 true)
>>> -; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8*
>>> [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
>>> +; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8
>>> 0, i64 12, i1 false)
>>>  ; CHECK-NEXT:    ret void
>>>  ;
>>>    %a = alloca %T, align 8
>>>
>>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at lists.llvm.org
>>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>>
>>
> On Wed, Dec 12, 2018 at 4:21 PM David Jones <dlj at google.com> wrote:
>
>> Hello,
>>
>> I'm seeing miscompiles at -O2 that bisect to this revision. I'm working
>> on reducing a test case now... it looks to my (admittedly unfamiliar) eyes
>> like this revision may not correctly handle structs correctly if they are
>> alloca'ed.
>>
>> I'll update when I have more (or a test case).
>>
>> --dlj
>>
>> On Fri, Dec 7, 2018 at 1:19 PM Nikita Popov via llvm-commits <
>> llvm-commits at lists.llvm.org> wrote:
>>
>>> Author: nikic
>>> Date: Fri Dec  7 13:16:58 2018
>>> New Revision: 348645
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=348645&view=rev
>>> Log:
>>> [MemCpyOpt] memset->memcpy forwarding with undef tail
>>>
>>> Currently memcpyopt optimizes cases like
>>>
>>>     memset(a, byte, N);
>>>     memcpy(b, a, M);
>>>
>>> to
>>>
>>>     memset(a, byte, N);
>>>     memset(b, byte, M);
>>>
>>> if M <= N. Often this allows further simplifications down the line,
>>> which drop the first memset entirely.
>>>
>>> This patch extends this optimization for the case where M > N, but we
>>> know that the bytes a[N..M] are undef due to alloca/lifetime.start.
>>>
>>> This situation arises relatively often for Rust code, because Rust does
>>> not initialize trailing structure padding and loves to insert redundant
>>> memcpys. This also fixes https://bugs.llvm.org/show_bug.cgi?id=39844.
>>>
>>> For the implementation, I'm reusing a bit of code for a similar existing
>>> optimization (direct memcpy of undef). I've also added memset support to
>>> MemDepAnalysis GetLocation -- Instead, getPointerDependencyFrom could be
>>> used, but it seems to make more sense to add this to GetLocation and thus
>>> make the computation cachable.
>>>
>>> Differential Revision: https://reviews.llvm.org/D55120
>>>
>>> Modified:
>>>     llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp
>>>     llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp
>>>     llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll
>>>
>>> Modified: llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp?rev=348645&r1=348644&r2=348645&view=diff
>>>
>>> ==============================================================================
>>> --- llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp (original)
>>> +++ llvm/trunk/lib/Analysis/MemoryDependenceAnalysis.cpp Fri Dec  7
>>> 13:16:58 2018
>>> @@ -154,6 +154,12 @@ static ModRefInfo GetLocation(const Inst
>>>      return ModRefInfo::Mod;
>>>    }
>>>
>>> +  if (const MemSetInst *MI = dyn_cast<MemSetInst>(Inst)) {
>>> +    Loc = MemoryLocation::getForDest(MI);
>>> +    // Conversatively assume ModRef for volatile memset.
>>> +    return MI->isVolatile() ? ModRefInfo::ModRef : ModRefInfo::Mod;
>>> +  }
>>> +
>>>    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
>>>      switch (II->getIntrinsicID()) {
>>>      case Intrinsic::lifetime_start:
>>>
>>> Modified: llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp?rev=348645&r1=348644&r2=348645&view=diff
>>>
>>> ==============================================================================
>>> --- llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp (original)
>>> +++ llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp Fri Dec  7
>>> 13:16:58 2018
>>> @@ -1144,6 +1144,21 @@ bool MemCpyOptPass::processMemSetMemCpyD
>>>    return true;
>>>  }
>>>
>>> +/// Determine whether the instruction has undefined content for the
>>> given Size,
>>> +/// either because it was freshly alloca'd or started its lifetime.
>>> +static bool hasUndefContents(Instruction *I, ConstantInt *Size) {
>>> +  if (isa<AllocaInst>(I))
>>> +    return true;
>>> +
>>> +  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
>>> +    if (II->getIntrinsicID() == Intrinsic::lifetime_start)
>>> +      if (ConstantInt *LTSize =
>>> dyn_cast<ConstantInt>(II->getArgOperand(0)))
>>> +        if (LTSize->getZExtValue() >= Size->getZExtValue())
>>> +          return true;
>>> +
>>> +  return false;
>>> +}
>>> +
>>>  /// Transform memcpy to memset when its source was just memset.
>>>  /// In other words, turn:
>>>  /// \code
>>> @@ -1167,12 +1182,23 @@ bool MemCpyOptPass::performMemCpyToMemSe
>>>    if (!AA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
>>>      return false;
>>>
>>> -  ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
>>> +  // A known memset size is required.
>>>    ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength());
>>> +  if (!MemSetSize)
>>> +    return false;
>>> +
>>>    // Make sure the memcpy doesn't read any more than what the memset
>>> wrote.
>>>    // Don't worry about sizes larger than i64.
>>> -  if (!MemSetSize || CopySize->getZExtValue() >
>>> MemSetSize->getZExtValue())
>>> -    return false;
>>> +  ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
>>> +  if (CopySize->getZExtValue() > MemSetSize->getZExtValue()) {
>>> +    // If the memcpy is larger than the memset, but the memory was
>>> undef prior
>>> +    // to the memset, we can just ignore the tail.
>>> +    MemDepResult DepInfo = MD->getDependency(MemSet);
>>> +    if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(),
>>> CopySize))
>>> +      CopySize = MemSetSize;
>>> +    else
>>> +      return false;
>>> +  }
>>>
>>>    IRBuilder<> Builder(MemCpy);
>>>    Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
>>> @@ -1252,19 +1278,7 @@ bool MemCpyOptPass::processMemCpy(MemCpy
>>>      if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
>>>        return processMemCpyMemCpyDependence(M, MDep);
>>>    } else if (SrcDepInfo.isDef()) {
>>> -    Instruction *I = SrcDepInfo.getInst();
>>> -    bool hasUndefContents = false;
>>> -
>>> -    if (isa<AllocaInst>(I)) {
>>> -      hasUndefContents = true;
>>> -    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
>>> -      if (II->getIntrinsicID() == Intrinsic::lifetime_start)
>>> -        if (ConstantInt *LTSize =
>>> dyn_cast<ConstantInt>(II->getArgOperand(0)))
>>> -          if (LTSize->getZExtValue() >= CopySize->getZExtValue())
>>> -            hasUndefContents = true;
>>> -    }
>>> -
>>> -    if (hasUndefContents) {
>>> +    if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) {
>>>        MD->removeInstruction(M);
>>>        M->eraseFromParent();
>>>        ++NumMemCpyInstr;
>>>
>>> Modified: llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll?rev=348645&r1=348644&r2=348645&view=diff
>>>
>>> ==============================================================================
>>> --- llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll
>>> (original)
>>> +++ llvm/trunk/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll Fri
>>> Dec  7 13:16:58 2018
>>> @@ -12,7 +12,7 @@ define void @test_alloca(i8* %result) {
>>>  ; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
>>>  ; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
>>>  ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8
>>> 0, i64 12, i1 false)
>>> -; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8*
>>> [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
>>> +; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8
>>> 0, i64 12, i1 false)
>>>  ; CHECK-NEXT:    ret void
>>>  ;
>>>    %a = alloca %T, align 8
>>> @@ -28,7 +28,7 @@ define void @test_alloca_with_lifetimes(
>>>  ; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
>>>  ; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 16, i8* [[B]])
>>>  ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8
>>> 0, i64 12, i1 false)
>>> -; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8*
>>> [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
>>> +; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8
>>> 0, i64 12, i1 false)
>>>  ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* [[B]])
>>>  ; CHECK-NEXT:    ret void
>>>  ;
>>> @@ -46,7 +46,7 @@ define void @test_malloc_with_lifetimes(
>>>  ; CHECK-NEXT:    [[A:%.*]] = call i8* @malloc(i64 16)
>>>  ; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 16, i8* [[A]])
>>>  ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[A]], i8
>>> 0, i64 12, i1 false)
>>> -; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8*
>>> [[RESULT:%.*]], i8* align 8 [[A]], i64 16, i1 false)
>>> +; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8
>>> 0, i64 12, i1 false)
>>>  ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* [[A]])
>>>  ; CHECK-NEXT:    call void @free(i8* [[A]])
>>>  ; CHECK-NEXT:    ret void
>>> @@ -98,7 +98,7 @@ define void @test_volatile_memset(i8* %r
>>>  ; CHECK-NEXT:    [[A:%.*]] = alloca [[T:%.*]], align 8
>>>  ; CHECK-NEXT:    [[B:%.*]] = bitcast %T* [[A]] to i8*
>>>  ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8
>>> 0, i64 12, i1 true)
>>> -; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8*
>>> [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
>>> +; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8
>>> 0, i64 12, i1 false)
>>>  ; CHECK-NEXT:    ret void
>>>  ;
>>>    %a = alloca %T, align 8
>>>
>>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at lists.llvm.org
>>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>>
>>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20181213/6390fc66/attachment.html>


More information about the llvm-commits mailing list