[llvm] r276841 - GVN-hoist: improve code generation for recursive GEPs
Sebastian Pop via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 26 22:48:12 PDT 2016
Author: spop
Date: Wed Jul 27 00:48:12 2016
New Revision: 276841
URL: http://llvm.org/viewvc/llvm-project?rev=276841&view=rev
Log:
GVN-hoist: improve code generation for recursive GEPs
When loading or storing in a field of a struct like "a.b.c", GVN is able to
detect the equivalent expressions, and GVN-hoist would fail in the code
generation. This is because the GEPs are not hoisted as scalar operations to
avoid moving the GEPs too far from their ld/st instruction when the ld/st is not
movable. So we end up having to generate code for the GEP of a ld/st when we
move the ld/st. In the case of a GEP referring to another GEP as in "a.b.c" we
need to code generate all the GEPs necessary to make all the operands available
at the new location for the ld/st. With this patch we recursively walk through
the GEP operands checking whether all operands are available, and in the case of
a GEP operand, it recursively makes all its operands available. Code generation
happens from the inner GEPs out until reaching the GEP that appears as an
operand of the ld/st.
Differential Revision: https://reviews.llvm.org/D22599
Added:
llvm/trunk/test/Transforms/GVN/hoist-recursive-geps.ll
Modified:
llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp
Modified: llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp?rev=276841&r1=276840&r2=276841&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp Wed Jul 27 00:48:12 2016
@@ -186,7 +186,8 @@ class GVNHoist {
public:
GVNHoist(DominatorTree *Dt, AliasAnalysis *Aa, MemoryDependenceResults *Md,
bool OptForMinSize)
- : DT(Dt), AA(Aa), MD(Md), OptForMinSize(OptForMinSize), HoistedCtr(0) {}
+ : DT(Dt), AA(Aa), MD(Md), OptForMinSize(OptForMinSize),
+ HoistingGeps(OptForMinSize), HoistedCtr(0) {}
bool run(Function &F) {
VN.setDomTree(DT);
VN.setAliasAnalysis(AA);
@@ -230,6 +231,7 @@ private:
AliasAnalysis *AA;
MemoryDependenceResults *MD;
const bool OptForMinSize;
+ const bool HoistingGeps;
DenseMap<const Value *, unsigned> DFSNumber;
BBSideEffectsSet BBSideEffects;
MemorySSA *MSSA;
@@ -609,21 +611,86 @@ private:
return true;
}
- bool makeOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt,
- const SmallVecInsn &InstructionsToHoist) const {
+ // Same as allOperandsAvailable with recursive check for GEP operands.
+ bool allGepOperandsAvailable(const Instruction *I,
+ const BasicBlock *HoistPt) const {
+ for (const Use &Op : I->operands())
+ if (const auto *Inst = dyn_cast<Instruction>(&Op))
+ if (!DT->dominates(Inst->getParent(), HoistPt)) {
+ if (const GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Inst)) {
+ if (!allGepOperandsAvailable(GepOp, HoistPt))
+ return false;
+ // Gep is available if all operands of GepOp are available.
+ } else {
+ // Gep is not available if it has operands other than GEPs that are
+ // defined in blocks not dominating HoistPt.
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // Make all operands of the GEP available.
+ void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
+ const SmallVecInsn &InstructionsToHoist,
+ Instruction *Gep) const {
+ assert(allGepOperandsAvailable(Gep, HoistPt) && "GEP operands not available");
+
+ Instruction *ClonedGep = Gep->clone();
+ for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i)
+ if (Instruction *Op = dyn_cast<Instruction>(Gep->getOperand(i))) {
+
+ // Check whether the operand is already available.
+ if (DT->dominates(Op->getParent(), HoistPt))
+ continue;
+
+ // As a GEP can refer to other GEPs, recursively make all the operands
+ // of this GEP available at HoistPt.
+ if (GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Op))
+ makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp);
+ }
+
+ // Copy Gep and replace its uses in Repl with ClonedGep.
+ ClonedGep->insertBefore(HoistPt->getTerminator());
+
+ // Conservatively discard any optimization hints, they may differ on the
+ // other paths.
+ ClonedGep->dropUnknownNonDebugMetadata();
+
+ // If we have optimization hints which agree with each other along different
+ // paths, preserve them.
+ for (const Instruction *OtherInst : InstructionsToHoist) {
+ const GetElementPtrInst *OtherGep;
+ if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst))
+ OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand());
+ else
+ OtherGep = cast<GetElementPtrInst>(
+ cast<StoreInst>(OtherInst)->getPointerOperand());
+ ClonedGep->intersectOptionalDataWith(OtherGep);
+ }
+
+ // Replace uses of Gep with ClonedGep in Repl.
+ Repl->replaceUsesOfWith(Gep, ClonedGep);
+ }
+
+ // In the case Repl is a load or a store, we make all their GEPs
+ // available: GEPs are not hoisted by default to avoid the address
+ // computations to be hoisted without the associated load or store.
+ bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt,
+ const SmallVecInsn &InstructionsToHoist) const {
// Check whether the GEP of a ld/st can be synthesized at HoistPt.
GetElementPtrInst *Gep = nullptr;
Instruction *Val = nullptr;
- if (auto *Ld = dyn_cast<LoadInst>(Repl))
+ if (auto *Ld = dyn_cast<LoadInst>(Repl)) {
Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand());
- if (auto *St = dyn_cast<StoreInst>(Repl)) {
+ } else if (auto *St = dyn_cast<StoreInst>(Repl)) {
Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand());
Val = dyn_cast<Instruction>(St->getValueOperand());
// Check that the stored value is available.
if (Val) {
if (isa<GetElementPtrInst>(Val)) {
// Check whether we can compute the GEP at HoistPt.
- if (!allOperandsAvailable(Val, HoistPt))
+ if (!allGepOperandsAvailable(Val, HoistPt))
return false;
} else if (!DT->dominates(Val->getParent(), HoistPt))
return false;
@@ -631,40 +698,13 @@ private:
}
// Check whether we can compute the Gep at HoistPt.
- if (!Gep || !allOperandsAvailable(Gep, HoistPt))
+ if (!Gep || !allGepOperandsAvailable(Gep, HoistPt))
return false;
- // Copy the gep before moving the ld/st.
- Instruction *ClonedGep = Gep->clone();
- ClonedGep->insertBefore(HoistPt->getTerminator());
- // Conservatively discard any optimization hints, they may differ on the
- // other paths.
- for (Instruction *OtherInst : InstructionsToHoist) {
- GetElementPtrInst *OtherGep;
- if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst))
- OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand());
- else
- OtherGep = cast<GetElementPtrInst>(
- cast<StoreInst>(OtherInst)->getPointerOperand());
- ClonedGep->intersectOptionalDataWith(OtherGep);
- combineKnownMetadata(ClonedGep, OtherGep);
- }
- Repl->replaceUsesOfWith(Gep, ClonedGep);
+ makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep);
- // Also copy Val when it is a GEP.
- if (Val && isa<GetElementPtrInst>(Val)) {
- Instruction *ClonedVal = Val->clone();
- ClonedVal->insertBefore(HoistPt->getTerminator());
- // Conservatively discard any optimization hints, they may differ on the
- // other paths.
- for (Instruction *OtherInst : InstructionsToHoist) {
- auto *OtherVal =
- cast<Instruction>(cast<StoreInst>(OtherInst)->getValueOperand());
- ClonedVal->intersectOptionalDataWith(OtherVal);
- combineKnownMetadata(ClonedVal, OtherVal);
- }
- Repl->replaceUsesOfWith(Val, ClonedVal);
- }
+ if (Val && isa<GetElementPtrInst>(Val))
+ makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val);
return true;
}
@@ -695,14 +735,14 @@ private:
Repl = InstructionsToHoist.front();
// We can move Repl in HoistPt only when all operands are available.
+ // When not HoistingGeps we need to copy the GEPs now.
// The order in which hoistings are done may influence the availability
// of operands.
- if (!allOperandsAvailable(Repl, HoistPt) &&
- !makeOperandsAvailable(Repl, HoistPt, InstructionsToHoist))
+ if (!allOperandsAvailable(Repl, HoistPt) && !HoistingGeps &&
+ !makeGepOperandsAvailable(Repl, HoistPt, InstructionsToHoist))
continue;
+
Repl->moveBefore(HoistPt->getTerminator());
- // TBAA may differ on one of the other paths, we need to get rid of
- // anything which might conflict.
}
if (isa<LoadInst>(Repl))
@@ -783,7 +823,7 @@ private:
break;
}
CI.insert(Call, VN);
- } else if (OptForMinSize || !isa<GetElementPtrInst>(&I1))
+ } else if (HoistingGeps || !isa<GetElementPtrInst>(&I1))
// Do not hoist scalars past calls that may write to memory because
// that could result in spills later. geps are handled separately.
// TODO: We can relax this for targets like AArch64 as they have more
Added: llvm/trunk/test/Transforms/GVN/hoist-recursive-geps.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/hoist-recursive-geps.ll?rev=276841&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/GVN/hoist-recursive-geps.ll (added)
+++ llvm/trunk/test/Transforms/GVN/hoist-recursive-geps.ll Wed Jul 27 00:48:12 2016
@@ -0,0 +1,103 @@
+; RUN: opt -gvn-hoist -S < %s | FileCheck %s
+
+; Check that recursive GEPs are hoisted.
+; CHECK-LABEL: @fun
+; CHECK: fdiv
+; CHECK: load
+; CHECK: load
+; CHECK: load
+; CHECK: load
+; CHECK: fsub
+; CHECK: fmul
+; CHECK: fsub
+; CHECK: fmul
+; CHECK-NOT: fsub
+; CHECK-NOT: fmul
+
+%0 = type { double, double, double }
+%1 = type { double, double, double }
+%2 = type { %3, %1, %1 }
+%3 = type { i32 (...)**, %4, %10*, %11, %11, %11, %11, %11, %11, %11, %11, %11 }
+%4 = type { %5 }
+%5 = type { %6 }
+%6 = type { %7 }
+%7 = type { %8 }
+%8 = type { %9 }
+%9 = type { i64, i64, i8* }
+%10 = type <{ i32 (...)**, i32, [4 x i8] }>
+%11 = type { [4 x [4 x double]] }
+%12 = type <{ %1, %0, i32, [4 x i8] }>
+%13 = type { %1, %0, %12, %3*, %14* }
+%14 = type opaque
+
+ at d = external global %0, align 8
+ at p = external global %1, align 8
+
+define zeroext i1 @fun(%2*, %12* dereferenceable(56), double*, %13*) {
+ %5 = alloca %2*, align 8
+ %6 = alloca %12*, align 8
+ %7 = alloca double*, align 8
+ %8 = alloca %13*, align 8
+ %9 = alloca double, align 8
+ %10 = alloca double, align 8
+ %11 = alloca double, align 8
+ %12 = alloca double, align 8
+ %13 = alloca double, align 8
+ %14 = alloca double, align 8
+ %15 = alloca double, align 8
+ store %2* %0, %2** %5, align 8
+ store %12* %1, %12** %6, align 8
+ store double* %2, double** %7, align 8
+ store %13* %3, %13** %8, align 8
+ %16 = load %2*, %2** %5, align 8
+ %17 = load double, double* getelementptr inbounds (%0, %0* @d, i32 0, i32 0), align 8
+ %18 = fdiv double 1.000000e+00, %17
+ store double %18, double* %15, align 8
+ %19 = load double, double* %15, align 8
+ %20 = fcmp oge double %19, 0.000000e+00
+ br i1 %20, label %21, label %36
+
+; <label>:21: ; preds = %4
+ %22 = getelementptr inbounds %2, %2* %16, i32 0, i32 1
+ %23 = getelementptr inbounds %1, %1* %22, i32 0, i32 0
+ %24 = load double, double* %23, align 8
+ %25 = load double, double* getelementptr inbounds (%1, %1* @p, i32 0, i32 0), align 8
+ %26 = fsub double %24, %25
+ %27 = load double, double* %15, align 8
+ %28 = fmul double %26, %27
+ store double %28, double* %9, align 8
+ %29 = getelementptr inbounds %2, %2* %16, i32 0, i32 2
+ %30 = getelementptr inbounds %1, %1* %29, i32 0, i32 0
+ %31 = load double, double* %30, align 8
+ %32 = load double, double* getelementptr inbounds (%1, %1* @p, i32 0, i32 0), align 8
+ %33 = fsub double %31, %32
+ %34 = load double, double* %15, align 8
+ %35 = fmul double %33, %34
+ store double %35, double* %12, align 8
+ br label %51
+
+; <label>:36: ; preds = %4
+ %37 = getelementptr inbounds %2, %2* %16, i32 0, i32 2
+ %38 = getelementptr inbounds %1, %1* %37, i32 0, i32 0
+ %39 = load double, double* %38, align 8
+ %40 = load double, double* getelementptr inbounds (%1, %1* @p, i32 0, i32 0), align 8
+ %41 = fsub double %39, %40
+ %42 = load double, double* %15, align 8
+ %43 = fmul double %41, %42
+ store double %43, double* %9, align 8
+ %44 = getelementptr inbounds %2, %2* %16, i32 0, i32 1
+ %45 = getelementptr inbounds %1, %1* %44, i32 0, i32 0
+ %46 = load double, double* %45, align 8
+ %47 = load double, double* getelementptr inbounds (%1, %1* @p, i32 0, i32 0), align 8
+ %48 = fsub double %46, %47
+ %49 = load double, double* %15, align 8
+ %50 = fmul double %48, %49
+ store double %50, double* %12, align 8
+ br label %51
+
+; <label>:51: ; preds = %36, %21
+ %52 = load double, double* %12, align 8
+ %53 = load double, double* %9, align 8
+ %54 = fcmp olt double %52, %53
+ ret i1 %54
+}
\ No newline at end of file
More information about the llvm-commits
mailing list