[Patch] Allow GVN to optimize load from aggregate stores
deadal nix
deadalnix at gmail.com
Tue Feb 11 14:25:10 PST 2014
Here is the patch as a file.
2014-02-10 11:05 GMT-08:00 Hal Finkel <hfinkel at anl.gov>:
> Hi,
>
> As a general policy matter, please attach patches as attachments, instead
> of including the text inline.
>
> cc'ing Daniel.
>
> -Hal
>
> ----- Original Message -----
> > From: "deadal nix" <deadalnix at gmail.com>
> > To: llvm-commits at cs.uiuc.edu
> > Sent: Monday, February 10, 2014 1:16:43 AM
> > Subject: [Patch] Allow GVN to optimize load from aggregate stores
> >
> >
> >
> >
> >
> > Hey folks,
> >
> > I'm facing really bad optimizations with aggregate stores, and
> > finally took the time to fix it. Here is the patch. It include
> > testing. make check-all seems happy with it, nothing looks broken.
> >
> > What is the right way to get that in ?
> >
> >
> >
> >
> > diff --git a/lib/Transforms/Scalar/GVN.cpp
> > b/lib/Transforms/Scalar/GVN.cpp
> > index 955ea1b..03d8275 100644
> > --- a/lib/Transforms/Scalar/GVN.cpp
> > +++ b/lib/Transforms/Scalar/GVN.cpp
> > @@ -942,11 +942,6 @@ static int AnalyzeLoadFromClobberingWrite(Type
> > *LoadTy, Value *LoadPtr,
> > Value *WritePtr,
> > uint64_t WriteSizeInBits,
> > const DataLayout &TD) {
> > - // If the loaded or stored value is a first class array or struct,
> > don't try
> > - // to transform them. We need to be able to bitcast to integer.
> > - if (LoadTy->isStructTy() || LoadTy->isArrayTy())
> > - return -1;
> > -
> > int64_t StoreOffset = 0, LoadOffset = 0;
> > Value *StoreBase =
> > GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&TD);
> > Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr,
> > LoadOffset, &TD);
> > @@ -1015,11 +1010,6 @@ static int AnalyzeLoadFromClobberingWrite(Type
> > *LoadTy, Value *LoadPtr,
> > static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value
> > *LoadPtr,
> > StoreInst *DepSI,
> > const DataLayout &TD) {
> > - // Cannot handle reading from store of first-class aggregate yet.
> > - if (DepSI->getValueOperand()->getType()->isStructTy() ||
> > - DepSI->getValueOperand()->getType()->isArrayTy())
> > - return -1;
> > -
> > Value *StorePtr = DepSI->getPointerOperand();
> > uint64_t StoreSize
> > =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());
> > return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
> > @@ -1117,6 +1107,25 @@ static Value *GetStoreValueForLoad(Value
> > *SrcVal, unsigned Offset,
> >
> > IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
> >
> > + // When facing an Aggregate, we have to find the element to fetch
> > from.
> > + while (SrcVal->getType()->isAggregateType()) {
> > + if (StructType *ST = dyn_cast<StructType>(SrcVal->getType())) {
> > + const StructLayout &Layout = *TD.getStructLayout(ST);
> > + unsigned i = 0;
> > + for (unsigned cd = 0, e = ST->getNumElements(); cd != e; ++cd) {
> > + if (Layout.getElementOffset(cd) > Offset) break;
> > + i = cd;
> > + }
> > +
> > + SrcVal = Builder.CreateExtractValue(SrcVal, i);
> > + Offset -= Layout.getElementOffset(i);
> > + } else if (ArrayType *AT = dyn_cast<ArrayType>(SrcVal->getType()))
> > {
> > + uint64_t EltSize = TD.getTypeAllocSize(AT->getElementType());
> > + SrcVal = Builder.CreateExtractValue(SrcVal, Offset / EltSize);
> > + Offset %= EltSize;
> > + }
> > + }
> > +
> > // Compute which bits of the stored value are being used by the load.
> > Convert
> > // to an integer type to start with.
> > if (SrcVal->getType()->getScalarType()->isPointerTy())
> > @@ -1921,6 +1930,11 @@ bool GVN::processLoad(LoadInst *L) {
> > if (TD) {
> > StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
> > L, *TD);
> > + if (StoredVal == 0) {
> > + // In case of aggregate store, we may have to fallback to
> > GetStoreValueForLoad.
> > + StoredVal = GetStoreValueForLoad(DepSI->getValueOperand(), 0,
> > L->getType(), L, *TD);
> > + }
> > +
> > if (StoredVal == 0)
> > return false;
> >
> > diff --git a/test/Transforms/GVN/aggregate_store.ll
> > b/test/Transforms/GVN/aggregate_store.ll
> > new file mode 100644
> > index 0000000..024aa08
> > --- /dev/null
> > +++ b/test/Transforms/GVN/aggregate_store.ll
> > @@ -0,0 +1,78 @@
> > +; RUN: opt -gvn -S < %s | FileCheck %s
> > +
> > +target datalayout =
> >
> +"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
> > +target triple = "x86_64-unknown-linux-gnu"
> > +
> > +%A__vtbl = type { i8*, i32 (%A*)* }
> > +%B__vtbl = type { i8*, i32 (%B*)* }
> > +%A = type { %A__vtbl* }
> > +%B = type { %B__vtbl*, i32 }
> > +
> > + at A__vtblZ = constant %A__vtbl { i8* null, i32 (%A*)* @A.foo }
> > + at B__vtblZ = constant %B__vtbl { i8* null, i32 (%B*)* @B.foo }
> > + at A__initZ = constant %A { %A__vtbl* @A__vtblZ }
> > + at B__initZ = constant %B { %B__vtbl* @B__vtblZ, i32 42 }
> > +
> > +declare i8* @allocmemory(i64)
> > +
> > +define i32 @f1() {
> > +body:
> > + %0 = tail call i8* @allocmemory(i64 8)
> > + %1 = bitcast i8* %0 to %A*
> > +; CHECK: store %A
> > + store %A { %A__vtbl* @A__vtblZ }, %A* %1, align 8
> > + %2 = getelementptr inbounds %A* %1, i32 0, i32 0
> > +; CHECK-NOT: load %A__vtbl*
> > + %vtbl = load %A__vtbl** %2, align 8
> > + %3 = getelementptr inbounds %A__vtbl* %vtbl, i64 0, i32 1
> > +; CHECK-NOT: load i32 (%A*)*
> > + %4 = load i32 (%A*)** %3, align 8
> > +; CHECK: tail call i32 @A.foo(%A* %1)
> > + %5 = tail call i32 %4(%A* %1)
> > + ret i32 %5
> > +}
> > +
> > +define i32 @f2() {
> > +body:
> > + %0 = tail call i8* @allocmemory(i64 16)
> > + %1 = bitcast i8* %0 to %B*
> > +; CHECK: store %B
> > + store %B { %B__vtbl* @B__vtblZ, i32 42 }, %B* %1, align 8
> > + %2 = bitcast i8* %0 to %A*
> > + %3 = getelementptr inbounds %A* %2, i32 0, i32 0
> > +; CHECK-NOT: load %A__vtbl*
> > +; CHECK-NOT: load %B__vtbl*
> > + %vtbl = load %A__vtbl** %3, align 8
> > + %4 = getelementptr inbounds %A__vtbl* %vtbl, i64 0, i32 1
> > + %5 = load i32 (%A*)** %4, align 8
> > + %6 = tail call i32 %5(%A* %2)
> > + ret i32 %6
> > +}
> > +
> > +define i32 @f3() {
> > +body:
> > + %0 = tail call i8* @allocmemory(i64 16)
> > + %1 = bitcast i8* %0 to %B*
> > +; CHECK: store %B
> > + store %B { %B__vtbl* @B__vtblZ, i32 42 }, %B* %1, align 8
> > + %2 = getelementptr inbounds i8* %0, i64 8
> > + %3 = bitcast i8* %2 to i32*
> > +; CHECK-NOT: load i32*
> > + %4 = load i32* %3, align 4
> > +; CHECK: ret i32 42
> > + ret i32 %4
> > +}
> > +
> > +define i32 @A.foo(%A* nocapture %this) {
> > +body:
> > + ret i32 42
> > +}
> > +
> > +define i32 @B.foo(%B* nocapture %this) {
> > +body:
> > + %0 = getelementptr inbounds %B* %this, i32 0, i32 1
> > + %1 = load i32* %0
> > + ret i32 %1
> > +}
> > +
> >
> >
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at cs.uiuc.edu
> > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> >
>
> --
> Hal Finkel
> Assistant Computational Scientist
> Leadership Computing Facility
> Argonne National Laboratory
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140211/097e9ca4/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-Optimize-load-from-aggregate-stores-in-GVN.patch
Type: application/octet-stream
Size: 6139 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140211/097e9ca4/attachment.obj>
More information about the llvm-commits
mailing list