[llvm] 2fea5d5 - [InstCombine] tmp alloca bypass: ensure that the replacement dominates all alloca uses

Wed Apr 14 08:06:14 PDT 2021

On Wed, Apr 14, 2021 at 5:53 PM Benjamin Kramer <benny.kra at gmail.com> wrote:
>
> This still wasn't enough, see
> cf4161673c7e7c7c57d8115468bfcc9988f43d36.
Thanks, i see.
I was initially trying to avoid such a hammer approach.

> I don't know if this transformation can be salvaged,
> it's quite annoying to do non-local changes in Instcombine.
Yep.
We could perhaps at least skip past all bitcasts on the source,
in some cases that will yield a non-instruction.

But as for the users of an alloca, yeah,
i'm not really seeing any good approach,
and it's not possible to support all cases.

Roman

> On Wed, Apr 14, 2021 at 12:04 PM Roman Lebedev via llvm-commits
> <llvm-commits at lists.llvm.org> wrote:
> >
> >
> > Author: Roman Lebedev
> > Date: 2021-04-14T13:04:12+03:00
> > New Revision: 2fea5d5d4accf3490854b064a51d1db049b1de64
> >
> > URL: https://github.com/llvm/llvm-project/commit/2fea5d5d4accf3490854b064a51d1db049b1de64
> > DIFF: https://github.com/llvm/llvm-project/commit/2fea5d5d4accf3490854b064a51d1db049b1de64.diff
> >
> > LOG: [InstCombine] tmp alloca bypass: ensure that the replacement dominates all alloca uses
> >
> > After 077bff39d46364035a5dcfa32fc69910ad0975d0,
> > isDereferenceableForAllocaSize() can recurse into selects,
> > which is causing a problem for the new test case,
> > reduced from https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20210412/904154.html
> > because the replacement (the select) is defined after the first use
> > of an alloca, so we'd end up with a verifier error.
> >
> > Now, this new check is too restrictive.
> > We likely can handle *some* cases, by trying to sink all uses of an alloca
> > to after the the def.
> >
> > Added:
> >     llvm/test/Transforms/InstCombine/tmp-alloca-bypass.ll
> >
> > Modified:
> >     llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
> >     llvm/test/Transforms/InstCombine/AMDGPU/memcpy-from-constant.ll
> >
> > Removed:
> >
> >
> >
> > ################################################################################
> > diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
> > index d1848bd78f679..3ca54958cc79f 100644
> > --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
> > +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
> > @@ -406,7 +406,11 @@ Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {
> >      Align SourceAlign = getOrEnforceKnownAlignment(
> >        TheSrc, AllocaAlign, DL, &AI, &AC, &DT);
> >      if (AllocaAlign <= SourceAlign &&
> > -        isDereferenceableForAllocaSize(TheSrc, &AI, DL)) {
> > +        isDereferenceableForAllocaSize(TheSrc, &AI, DL) &&
> > +        (!isa<Instruction>(TheSrc) || all_of(AI.users(), [&](const User *U) {
> > +          return DT.dominates(cast<Instruction>(TheSrc), cast<Instruction>(U));
> > +        }))) {
> > +      // FIXME: can the dominance restriction be relaxed by sinking instrns?
> >        LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
> >        LLVM_DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
> >        unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace();
> >
> > diff  --git a/llvm/test/Transforms/InstCombine/AMDGPU/memcpy-from-constant.ll b/llvm/test/Transforms/InstCombine/AMDGPU/memcpy-from-constant.ll
> > index 4e7e8256f2ebe..c6028267aae6f 100644
> > --- a/llvm/test/Transforms/InstCombine/AMDGPU/memcpy-from-constant.ll
> > +++ b/llvm/test/Transforms/InstCombine/AMDGPU/memcpy-from-constant.ll
> > @@ -135,10 +135,12 @@ define amdgpu_kernel void @memcpy_constant_byref_arg_ptr_to_alloca_too_many_byte
> >  ; Simple memcpy to alloca from constant address space intrinsic call
> >  define amdgpu_kernel void @memcpy_constant_intrinsic_ptr_to_alloca(i8 addrspace(1)* %out, i32 %idx) {
> >  ; CHECK-LABEL: @memcpy_constant_intrinsic_ptr_to_alloca(
> > +; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(5)
> > +; CHECK-NEXT:    [[ALLOCA_CAST:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i32 0, i32 0
> >  ; CHECK-NEXT:    [[KERNARG_SEGMENT_PTR:%.*]] = call align 16 dereferenceable(32) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
> > -; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
> > -; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, i8 addrspace(4)* [[KERNARG_SEGMENT_PTR]], i64 [[TMP1]]
> > -; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8 addrspace(4)* [[GEP]], align 1
> > +; CHECK-NEXT:    call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* noundef align 4 dereferenceable(32) [[ALLOCA_CAST]], i8 addrspace(4)* noundef align 16 dereferenceable(32) [[KERNARG_SEGMENT_PTR]], i64 32, i1 false)
> > +; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i32 0, i32 [[IDX:%.*]]
> > +; CHECK-NEXT:    [[LOAD:%.*]] = load i8, i8 addrspace(5)* [[GEP]], align 1
> >  ; CHECK-NEXT:    store i8 [[LOAD]], i8 addrspace(1)* [[OUT:%.*]], align 1
> >  ; CHECK-NEXT:    ret void
> >  ;
> >
> > diff  --git a/llvm/test/Transforms/InstCombine/tmp-alloca-bypass.ll b/llvm/test/Transforms/InstCombine/tmp-alloca-bypass.ll
> > new file mode 100644
> > index 0000000000000..9a87e8a68f8ed
> > --- /dev/null
> > +++ b/llvm/test/Transforms/InstCombine/tmp-alloca-bypass.ll
> > @@ -0,0 +1,35 @@
> > +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
> > +; RUN: opt < %s -instcombine -S | FileCheck %s
> > +
> > +; See https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20210412/904154.html
> > +; When replacing an allocation that is only modified by a memcpy/memmove from
> > +; a constant whose alignment is equal to or exceeds that of the allocation,
> > +; we also need to ensure that we actually can replace all uses of an alloca
> > +; with said constant. This matters because it could be e.g. a select between
> > +; two constants, that happens after the first use of an alloca.
> > +
> > +%t0 = type { i8*, i64 }
> > +
> > + at g0 = external constant %t0
> > + at g1 = external constant %t0
> > +define void @test(i8*%out) {
> > +; CHECK-LABEL: @test(
> > +; CHECK-NEXT:    [[I0:%.*]] = alloca [[T0:%.*]], align 8
> > +; CHECK-NEXT:    [[I1:%.*]] = bitcast %t0* [[I0]] to i8*
> > +; CHECK-NEXT:    [[I2:%.*]] = call i1 @get_cond()
> > +; CHECK-NEXT:    [[I3:%.*]] = select i1 [[I2]], i8* bitcast (%t0* @g0 to i8*), i8* bitcast (%t0* @g1 to i8*)
> > +; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 8 dereferenceable(16) [[I1]], i8* noundef nonnull align 8 dereferenceable(16) [[I3]], i64 16, i1 false)
> > +; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(16) [[OUT:%.*]], i8* noundef nonnull align 8 dereferenceable(16) [[I1]], i64 16, i1 false)
> > +; CHECK-NEXT:    ret void
> > +;
> > +  %i0 = alloca %t0
> > +  %i1 = bitcast %t0* %i0 to i8*
> > +  %i2 = call i1 @get_cond()
> > +  %i3 = select i1 %i2, i8* bitcast (%t0* @g0 to i8*), i8* bitcast (%t0* @g1 to i8*)
> > +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %i1, i8* %i3, i64 16, i1 false)
> > +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %i1, i64 16, i1 false)
> > +  ret void
> > +}
> > +
> > +declare i1 @get_cond()
> > +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
> >
> >
> >
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at lists.llvm.org
> > https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits