<div dir="ltr"><div><div>Hey folks,<br><br></div>I'm facing really bad optimizations with aggregate stores, and finally took the time to fix it. Here is the patch. It include testing. make check-all seems happy with it, nothing looks broken.<br>
<br></div>What is the right way to get that in ?<br><div><div><div><br>diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp<br>index 955ea1b..03d8275 100644<br>--- a/lib/Transforms/Scalar/GVN.cpp<br>
+++ b/lib/Transforms/Scalar/GVN.cpp<br>@@ -942,11 +942,6 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,<br> Value *WritePtr,<br> uint64_t WriteSizeInBits,<br>
const DataLayout &TD) {<br>- // If the loaded or stored value is a first class array or struct, don't try<br>- // to transform them. We need to be able to bitcast to integer.<br>
- if (LoadTy->isStructTy() || LoadTy->isArrayTy())<br>- return -1;<br>-<br> int64_t StoreOffset = 0, LoadOffset = 0;<br> Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&TD);<br>
Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &TD);<br>@@ -1015,11 +1010,6 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,<br> static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,<br>
StoreInst *DepSI,<br> const DataLayout &TD) {<br>- // Cannot handle reading from store of first-class aggregate yet.<br>- if (DepSI->getValueOperand()->getType()->isStructTy() ||<br>
- DepSI->getValueOperand()->getType()->isArrayTy())<br>- return -1;<br>-<br> Value *StorePtr = DepSI->getPointerOperand();<br> uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());<br>
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,<br>@@ -1117,6 +1107,25 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,<br> <br> IRBuilder<> Builder(InsertPt->getParent(), InsertPt);<br>
<br>+ // When facing an Aggregate, we have to find the element to fetch from.<br>+ while (SrcVal->getType()->isAggregateType()) {<br>+ if (StructType *ST = dyn_cast<StructType>(SrcVal->getType())) {<br>
+ const StructLayout &Layout = *TD.getStructLayout(ST);<br>+ unsigned i = 0;<br>+ for (unsigned cd = 0, e = ST->getNumElements(); cd != e; ++cd) {<br>+ if (Layout.getElementOffset(cd) > Offset) break;<br>
+ i = cd;<br>+ }<br>+<br>+ SrcVal = Builder.CreateExtractValue(SrcVal, i);<br>+ Offset -= Layout.getElementOffset(i);<br>+ } else if (ArrayType *AT = dyn_cast<ArrayType>(SrcVal->getType())) {<br>
+ uint64_t EltSize = TD.getTypeAllocSize(AT->getElementType());<br>+ SrcVal = Builder.CreateExtractValue(SrcVal, Offset / EltSize);<br>+ Offset %= EltSize;<br>+ }<br>+ }<br>+<br> // Compute which bits of the stored value are being used by the load. Convert<br>
// to an integer type to start with.<br> if (SrcVal->getType()->getScalarType()->isPointerTy())<br>@@ -1921,6 +1930,11 @@ bool GVN::processLoad(LoadInst *L) {<br> if (TD) {<br> StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),<br>
L, *TD);<br>+ if (StoredVal == 0) {<br>+ // In case of aggregate store, we may have to fallback to GetStoreValueForLoad.<br>+ StoredVal = GetStoreValueForLoad(DepSI->getValueOperand(), 0, L->getType(), L, *TD);<br>
+ }<br>+<br> if (StoredVal == 0)<br> return false;<br> <br>diff --git a/test/Transforms/GVN/aggregate_store.ll b/test/Transforms/GVN/aggregate_store.ll<br>new file mode 100644<br>index 0000000..024aa08<br>
--- /dev/null<br>+++ b/test/Transforms/GVN/aggregate_store.ll<br>@@ -0,0 +1,78 @@<br>+; RUN: opt -gvn -S < %s | FileCheck %s<br>+<br>+target datalayout =<br>+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"<br>
+target triple = "x86_64-unknown-linux-gnu"<br>+<br>+%A__vtbl = type { i8*, i32 (%A*)* }<br>+%B__vtbl = type { i8*, i32 (%B*)* }<br>+%A = type { %A__vtbl* }<br>+%B = type { %B__vtbl*, i32 }<br>+<br>+@A__vtblZ = constant %A__vtbl { i8* null, i32 (%A*)* @A.foo }<br>
+@B__vtblZ = constant %B__vtbl { i8* null, i32 (%B*)* @B.foo }<br>+@A__initZ = constant %A { %A__vtbl* @A__vtblZ }<br>+@B__initZ = constant %B { %B__vtbl* @B__vtblZ, i32 42 }<br>+<br>+declare i8* @allocmemory(i64)<br>+<br>
+define i32 @f1() {<br>+body:<br>+ %0 = tail call i8* @allocmemory(i64 8)<br>+ %1 = bitcast i8* %0 to %A*<br>+; CHECK: store %A<br>+ store %A { %A__vtbl* @A__vtblZ }, %A* %1, align 8<br>+ %2 = getelementptr inbounds %A* %1, i32 0, i32 0<br>
+; CHECK-NOT: load %A__vtbl*<br>+ %vtbl = load %A__vtbl** %2, align 8<br>+ %3 = getelementptr inbounds %A__vtbl* %vtbl, i64 0, i32 1<br>+; CHECK-NOT: load i32 (%A*)*<br>+ %4 = load i32 (%A*)** %3, align 8<br>+; CHECK: tail call i32 @A.foo(%A* %1)<br>
+ %5 = tail call i32 %4(%A* %1)<br>+ ret i32 %5<br>+}<br>+<br>+define i32 @f2() {<br>+body:<br>+ %0 = tail call i8* @allocmemory(i64 16)<br>+ %1 = bitcast i8* %0 to %B*<br>+; CHECK: store %B<br>+ store %B { %B__vtbl* @B__vtblZ, i32 42 }, %B* %1, align 8<br>
+ %2 = bitcast i8* %0 to %A*<br>+ %3 = getelementptr inbounds %A* %2, i32 0, i32 0<br>+; CHECK-NOT: load %A__vtbl*<br>+; CHECK-NOT: load %B__vtbl*<br>+ %vtbl = load %A__vtbl** %3, align 8<br>+ %4 = getelementptr inbounds %A__vtbl* %vtbl, i64 0, i32 1<br>
+ %5 = load i32 (%A*)** %4, align 8<br>+ %6 = tail call i32 %5(%A* %2)<br>+ ret i32 %6<br>+}<br>+<br>+define i32 @f3() {<br>+body:<br>+ %0 = tail call i8* @allocmemory(i64 16)<br>+ %1 = bitcast i8* %0 to %B*<br>+; CHECK: store %B<br>
+ store %B { %B__vtbl* @B__vtblZ, i32 42 }, %B* %1, align 8<br>+ %2 = getelementptr inbounds i8* %0, i64 8<br>+ %3 = bitcast i8* %2 to i32*<br>+; CHECK-NOT: load i32*<br>+ %4 = load i32* %3, align 4<br>+; CHECK: ret i32 42<br>
+ ret i32 %4<br>+}<br>+<br>+define i32 @A.foo(%A* nocapture %this) {<br>+body:<br>+ ret i32 42<br>+}<br>+<br>+define i32 @B.foo(%B* nocapture %this) {<br>+body:<br>+ %0 = getelementptr inbounds %B* %this, i32 0, i32 1<br>
+ %1 = load i32* %0<br>+ ret i32 %1<br>+}<br>+<br><br></div></div></div></div>