[Patch] Allow GVN to optimize load from aggregate stores
Hal Finkel
hfinkel at anl.gov
Mon Feb 10 11:05:29 PST 2014
Hi,
As a general policy matter, please attach patches as attachments, instead of including the text inline.
cc'ing Daniel.
-Hal
----- Original Message -----
> From: "deadal nix" <deadalnix at gmail.com>
> To: llvm-commits at cs.uiuc.edu
> Sent: Monday, February 10, 2014 1:16:43 AM
> Subject: [Patch] Allow GVN to optimize load from aggregate stores
>
>
>
>
>
> Hey folks,
>
> I'm facing really bad optimizations with aggregate stores, and
> finally took the time to fix it. Here is the patch. It include
> testing. make check-all seems happy with it, nothing looks broken.
>
> What is the right way to get that in ?
>
>
>
>
> diff --git a/lib/Transforms/Scalar/GVN.cpp
> b/lib/Transforms/Scalar/GVN.cpp
> index 955ea1b..03d8275 100644
> --- a/lib/Transforms/Scalar/GVN.cpp
> +++ b/lib/Transforms/Scalar/GVN.cpp
> @@ -942,11 +942,6 @@ static int AnalyzeLoadFromClobberingWrite(Type
> *LoadTy, Value *LoadPtr,
> Value *WritePtr,
> uint64_t WriteSizeInBits,
> const DataLayout &TD) {
> - // If the loaded or stored value is a first class array or struct,
> don't try
> - // to transform them. We need to be able to bitcast to integer.
> - if (LoadTy->isStructTy() || LoadTy->isArrayTy())
> - return -1;
> -
> int64_t StoreOffset = 0, LoadOffset = 0;
> Value *StoreBase =
> GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&TD);
> Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr,
> LoadOffset, &TD);
> @@ -1015,11 +1010,6 @@ static int AnalyzeLoadFromClobberingWrite(Type
> *LoadTy, Value *LoadPtr,
> static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value
> *LoadPtr,
> StoreInst *DepSI,
> const DataLayout &TD) {
> - // Cannot handle reading from store of first-class aggregate yet.
> - if (DepSI->getValueOperand()->getType()->isStructTy() ||
> - DepSI->getValueOperand()->getType()->isArrayTy())
> - return -1;
> -
> Value *StorePtr = DepSI->getPointerOperand();
> uint64_t StoreSize
> =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());
> return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
> @@ -1117,6 +1107,25 @@ static Value *GetStoreValueForLoad(Value
> *SrcVal, unsigned Offset,
>
> IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
>
> + // When facing an Aggregate, we have to find the element to fetch
> from.
> + while (SrcVal->getType()->isAggregateType()) {
> + if (StructType *ST = dyn_cast<StructType>(SrcVal->getType())) {
> + const StructLayout &Layout = *TD.getStructLayout(ST);
> + unsigned i = 0;
> + for (unsigned cd = 0, e = ST->getNumElements(); cd != e; ++cd) {
> + if (Layout.getElementOffset(cd) > Offset) break;
> + i = cd;
> + }
> +
> + SrcVal = Builder.CreateExtractValue(SrcVal, i);
> + Offset -= Layout.getElementOffset(i);
> + } else if (ArrayType *AT = dyn_cast<ArrayType>(SrcVal->getType()))
> {
> + uint64_t EltSize = TD.getTypeAllocSize(AT->getElementType());
> + SrcVal = Builder.CreateExtractValue(SrcVal, Offset / EltSize);
> + Offset %= EltSize;
> + }
> + }
> +
> // Compute which bits of the stored value are being used by the load.
> Convert
> // to an integer type to start with.
> if (SrcVal->getType()->getScalarType()->isPointerTy())
> @@ -1921,6 +1930,11 @@ bool GVN::processLoad(LoadInst *L) {
> if (TD) {
> StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
> L, *TD);
> + if (StoredVal == 0) {
> + // In case of aggregate store, we may have to fallback to
> GetStoreValueForLoad.
> + StoredVal = GetStoreValueForLoad(DepSI->getValueOperand(), 0,
> L->getType(), L, *TD);
> + }
> +
> if (StoredVal == 0)
> return false;
>
> diff --git a/test/Transforms/GVN/aggregate_store.ll
> b/test/Transforms/GVN/aggregate_store.ll
> new file mode 100644
> index 0000000..024aa08
> --- /dev/null
> +++ b/test/Transforms/GVN/aggregate_store.ll
> @@ -0,0 +1,78 @@
> +; RUN: opt -gvn -S < %s | FileCheck %s
> +
> +target datalayout =
> +"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
> +target triple = "x86_64-unknown-linux-gnu"
> +
> +%A__vtbl = type { i8*, i32 (%A*)* }
> +%B__vtbl = type { i8*, i32 (%B*)* }
> +%A = type { %A__vtbl* }
> +%B = type { %B__vtbl*, i32 }
> +
> + at A__vtblZ = constant %A__vtbl { i8* null, i32 (%A*)* @A.foo }
> + at B__vtblZ = constant %B__vtbl { i8* null, i32 (%B*)* @B.foo }
> + at A__initZ = constant %A { %A__vtbl* @A__vtblZ }
> + at B__initZ = constant %B { %B__vtbl* @B__vtblZ, i32 42 }
> +
> +declare i8* @allocmemory(i64)
> +
> +define i32 @f1() {
> +body:
> + %0 = tail call i8* @allocmemory(i64 8)
> + %1 = bitcast i8* %0 to %A*
> +; CHECK: store %A
> + store %A { %A__vtbl* @A__vtblZ }, %A* %1, align 8
> + %2 = getelementptr inbounds %A* %1, i32 0, i32 0
> +; CHECK-NOT: load %A__vtbl*
> + %vtbl = load %A__vtbl** %2, align 8
> + %3 = getelementptr inbounds %A__vtbl* %vtbl, i64 0, i32 1
> +; CHECK-NOT: load i32 (%A*)*
> + %4 = load i32 (%A*)** %3, align 8
> +; CHECK: tail call i32 @A.foo(%A* %1)
> + %5 = tail call i32 %4(%A* %1)
> + ret i32 %5
> +}
> +
> +define i32 @f2() {
> +body:
> + %0 = tail call i8* @allocmemory(i64 16)
> + %1 = bitcast i8* %0 to %B*
> +; CHECK: store %B
> + store %B { %B__vtbl* @B__vtblZ, i32 42 }, %B* %1, align 8
> + %2 = bitcast i8* %0 to %A*
> + %3 = getelementptr inbounds %A* %2, i32 0, i32 0
> +; CHECK-NOT: load %A__vtbl*
> +; CHECK-NOT: load %B__vtbl*
> + %vtbl = load %A__vtbl** %3, align 8
> + %4 = getelementptr inbounds %A__vtbl* %vtbl, i64 0, i32 1
> + %5 = load i32 (%A*)** %4, align 8
> + %6 = tail call i32 %5(%A* %2)
> + ret i32 %6
> +}
> +
> +define i32 @f3() {
> +body:
> + %0 = tail call i8* @allocmemory(i64 16)
> + %1 = bitcast i8* %0 to %B*
> +; CHECK: store %B
> + store %B { %B__vtbl* @B__vtblZ, i32 42 }, %B* %1, align 8
> + %2 = getelementptr inbounds i8* %0, i64 8
> + %3 = bitcast i8* %2 to i32*
> +; CHECK-NOT: load i32*
> + %4 = load i32* %3, align 4
> +; CHECK: ret i32 42
> + ret i32 %4
> +}
> +
> +define i32 @A.foo(%A* nocapture %this) {
> +body:
> + ret i32 42
> +}
> +
> +define i32 @B.foo(%B* nocapture %this) {
> +body:
> + %0 = getelementptr inbounds %B* %this, i32 0, i32 1
> + %1 = load i32* %0
> + ret i32 %1
> +}
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
--
Hal Finkel
Assistant Computational Scientist
Leadership Computing Facility
Argonne National Laboratory
More information about the llvm-commits
mailing list