Optimize load from aggregate stores

Tue Feb 11 17:19:11 PST 2014

Hi Adrian. I assumed this thread was kind of dead and submitted a better
patch (you'll find it attached).

This updated patch test more in depth the struct case. I don't have an
array test case ready. I can add one if that is really necessary.

2014-02-11 15:04 GMT-08:00 Adrian Prantl <aprantl at apple.com>:

> Thanks for adding the test! It looks like this only tests the struct case?
> There are probably others more qualified to comment on the GVN-related
> changes.
>
> -- adrian
>
> On Feb 7, 2014, at 21:02, deadal nix <deadalnix at gmail.com> wrote:
>
> > Here is the updated patch with test case included:
> >
> > diff --git a/lib/Transforms/Scalar/GVN.cpp
> b/lib/Transforms/Scalar/GVN.cpp
> > index f350b9b..c8a0ec0 100644
> > --- a/lib/Transforms/Scalar/GVN.cpp
> > +++ b/lib/Transforms/Scalar/GVN.cpp
> > @@ -996,11 +996,6 @@ static int AnalyzeLoadFromClobberingWrite(Type
> *LoadTy, Value *LoadPtr,
> >  static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
> >                                            StoreInst *DepSI,
> >                                            const DataLayout &TD) {
> > -  // Cannot handle reading from store of first-class aggregate yet.
> > -  if (DepSI->getValueOperand()->getType()->isStructTy() ||
> > -      DepSI->getValueOperand()->getType()->isArrayTy())
> > -    return -1;
> > -
> >    Value *StorePtr = DepSI->getPointerOperand();
> >    uint64_t StoreSize
> =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());
> >    return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
> > @@ -1097,6 +1092,25 @@ static Value *GetStoreValueForLoad(Value *SrcVal,
> unsigned Offset,
> >
> >    IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
> >
> > +  // When facing an Aggregate, we have to find the element to fetch
> from.
> > +  while (SrcVal->getType()->isAggregateType()) {
> > +    if (StructType *ST = dyn_cast<StructType>(SrcVal->getType())) {
> > +      const StructLayout &Layout = *TD.getStructLayout(ST);
> > +      unsigned i = 0;
> > +      for (unsigned cd = 0, e = ST->getNumElements(); cd != e; ++cd) {
> > +        if (Layout.getElementOffsetInBits(cd) > Offset) break;
> > +        i = cd;
> > +      }
> > +
> > +      SrcVal = Builder.CreateExtractValue(SrcVal, i);
> > +      Offset -= Layout.getElementOffsetInBits(i);
> > +    } else if (ArrayType *AT = dyn_cast<ArrayType>(SrcVal->getType())) {
> > +      uint64_t EltSize =
> TD.getTypeAllocSizeInBits(AT->getElementType());
> > +      SrcVal = Builder.CreateExtractValue(SrcVal, Offset / EltSize);
> > +      Offset %= EltSize;
> > +    }
> > +  }
> > +
> >    // Compute which bits of the stored value are being used by the load.
>  Convert
> >    // to an integer type to start with.
> >    if (SrcVal->getType()->getScalarType()->isPointerTy())
> > diff --git a/test/Transforms/GVN/aggregate_store.ll
> b/test/Transforms/GVN/aggregate_store.ll
> > new file mode 100644
> > index 0000000..fc8aab4
> > --- /dev/null
> > +++ b/test/Transforms/GVN/aggregate_store.ll
> > @@ -0,0 +1,36 @@
> > +; RUN: opt -gvn -S < %s | FileCheck %s
> > +
> > +target datalayout =
> >
> +"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
> > +target triple = "x86_64-unknown-linux-gnu"
> > +
> > +%A__vtbl = type { i8*, i32 (%A*)* }
> > +%A = type { %A__vtbl* }
> > +
> > + at A__vtblZ = constant %A__vtbl { i8* null, i32 (%A*)* @A.foo }
> > + at A__initZ = constant %A { %A__vtbl* @A__vtblZ }
> > +
> > +declare i8* @allocmemory(i64)
> > +
> > +define i32 @f1() {
> > +body:
> > +  %0 = tail call i8* @allocmemory(i64 8)
> > +  %1 = bitcast i8* %0 to %A*
> > +; CHECK: store %A
> > +  store %A { %A__vtbl* @A__vtblZ }, %A* %1, align 8
> > +  %2 = getelementptr inbounds %A* %1, i32 0, i32 0
> > +; CHECK-NOT: load %A__vtbl*
> > +  %vtbl.i = load %A__vtbl** %2, align 8
> > +  %3 = getelementptr inbounds %A__vtbl* %vtbl.i, i64 0, i32 1
> > +; CHECK-NOT: load i32 (%A*)*
> > +  %4 = load i32 (%A*)** %3, align 8
> > +; CHECK: tail call i32 @A.foo(%A* %1)
> > +  %5 = tail call i32 %4(%A* %1)
> > +  ret i32 %5
> > +}
> > +
> > +define i32 @A.foo(%A* nocapture %this) nounwind readnone {
> > +body:
> > +  ret i32 42
> > +}
> > +
> >
> >
> >
> > 2014-02-07 10:01 GMT-08:00 Adrian Prantl <aprantl at apple.com>:
> >
> > On Feb 6, 2014, at 22:52, deadal nix <deadalnix at gmail.com> wrote:
> > >
> > > I have some test cases, but I'm not sure how the LLVM test suite work
> and what is the right way to add them. I'll need some directions here to
> complete the diff.Anyway, the code :
> >
> > To test a change to GVN it would be best to have a .ll file with a RUN:
> line that invokes opt (with the appropriate flags for GVN) and then CHECKs
> the output with FileCheck. You can grep for "RUN: opt" in test/ for some
> examples.
> > Assuming that you have source code for a testcase, you can extract an
> appropriate fragment from the output go -mllvm -print-after-all.
> >
> > -- adrian
> >
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140211/e0332fdc/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-Optimize-load-from-aggregate-stores-in-GVN.patch
Type: application/octet-stream
Size: 6139 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140211/e0332fdc/attachment.obj>