[llvm] r276841 - GVN-hoist: improve code generation for recursive GEPs

Sebastian Pop via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 26 22:48:12 PDT 2016


Author: spop
Date: Wed Jul 27 00:48:12 2016
New Revision: 276841

URL: http://llvm.org/viewvc/llvm-project?rev=276841&view=rev
Log:
GVN-hoist: improve code generation for recursive GEPs

When loading or storing in a field of a struct like "a.b.c", GVN is able to
detect the equivalent expressions, and GVN-hoist would fail in the code
generation.  This is because the GEPs are not hoisted as scalar operations to
avoid moving the GEPs too far from their ld/st instruction when the ld/st is not
movable.  So we end up having to generate code for the GEP of a ld/st when we
move the ld/st.  In the case of a GEP referring to another GEP as in "a.b.c" we
need to code generate all the GEPs necessary to make all the operands available
at the new location for the ld/st.  With this patch we recursively walk through
the GEP operands checking whether all operands are available, and in the case of
a GEP operand, it recursively makes all its operands available. Code generation
happens from the inner GEPs out until reaching the GEP that appears as an
operand of the ld/st.

Differential Revision: https://reviews.llvm.org/D22599

Added:
    llvm/trunk/test/Transforms/GVN/hoist-recursive-geps.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp?rev=276841&r1=276840&r2=276841&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/GVNHoist.cpp Wed Jul 27 00:48:12 2016
@@ -186,7 +186,8 @@ class GVNHoist {
 public:
   GVNHoist(DominatorTree *Dt, AliasAnalysis *Aa, MemoryDependenceResults *Md,
            bool OptForMinSize)
-      : DT(Dt), AA(Aa), MD(Md), OptForMinSize(OptForMinSize), HoistedCtr(0) {}
+      : DT(Dt), AA(Aa), MD(Md), OptForMinSize(OptForMinSize),
+        HoistingGeps(OptForMinSize), HoistedCtr(0) {}
   bool run(Function &F) {
     VN.setDomTree(DT);
     VN.setAliasAnalysis(AA);
@@ -230,6 +231,7 @@ private:
   AliasAnalysis *AA;
   MemoryDependenceResults *MD;
   const bool OptForMinSize;
+  const bool HoistingGeps;
   DenseMap<const Value *, unsigned> DFSNumber;
   BBSideEffectsSet BBSideEffects;
   MemorySSA *MSSA;
@@ -609,21 +611,86 @@ private:
     return true;
   }
 
-  bool makeOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt,
-                             const SmallVecInsn &InstructionsToHoist) const {
+  // Same as allOperandsAvailable with recursive check for GEP operands.
+  bool allGepOperandsAvailable(const Instruction *I,
+                               const BasicBlock *HoistPt) const {
+    for (const Use &Op : I->operands())
+      if (const auto *Inst = dyn_cast<Instruction>(&Op))
+        if (!DT->dominates(Inst->getParent(), HoistPt)) {
+          if (const GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Inst)) {
+            if (!allGepOperandsAvailable(GepOp, HoistPt))
+              return false;
+            // Gep is available if all operands of GepOp are available.
+          } else {
+            // Gep is not available if it has operands other than GEPs that are
+            // defined in blocks not dominating HoistPt.
+            return false;
+          }
+        }
+    return true;
+  }
+
+  // Make all operands of the GEP available.
+  void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
+                         const SmallVecInsn &InstructionsToHoist,
+                         Instruction *Gep) const {
+    assert(allGepOperandsAvailable(Gep, HoistPt) && "GEP operands not available");
+
+    Instruction *ClonedGep = Gep->clone();
+    for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i)
+      if (Instruction *Op = dyn_cast<Instruction>(Gep->getOperand(i))) {
+
+        // Check whether the operand is already available.
+        if (DT->dominates(Op->getParent(), HoistPt))
+          continue;
+
+        // As a GEP can refer to other GEPs, recursively make all the operands
+        // of this GEP available at HoistPt.
+        if (GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Op))
+          makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp);
+      }
+
+    // Copy Gep and replace its uses in Repl with ClonedGep.
+    ClonedGep->insertBefore(HoistPt->getTerminator());
+
+    // Conservatively discard any optimization hints, they may differ on the
+    // other paths.
+    ClonedGep->dropUnknownNonDebugMetadata();
+
+    // If we have optimization hints which agree with each other along different
+    // paths, preserve them.
+    for (const Instruction *OtherInst : InstructionsToHoist) {
+      const GetElementPtrInst *OtherGep;
+      if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst))
+        OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand());
+      else
+        OtherGep = cast<GetElementPtrInst>(
+            cast<StoreInst>(OtherInst)->getPointerOperand());
+      ClonedGep->intersectOptionalDataWith(OtherGep);
+    }
+
+    // Replace uses of Gep with ClonedGep in Repl.
+    Repl->replaceUsesOfWith(Gep, ClonedGep);
+  }
+
+  // In the case Repl is a load or a store, we make all their GEPs
+  // available: GEPs are not hoisted by default to avoid the address
+  // computations to be hoisted without the associated load or store.
+  bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt,
+                                const SmallVecInsn &InstructionsToHoist) const {
     // Check whether the GEP of a ld/st can be synthesized at HoistPt.
     GetElementPtrInst *Gep = nullptr;
     Instruction *Val = nullptr;
-    if (auto *Ld = dyn_cast<LoadInst>(Repl))
+    if (auto *Ld = dyn_cast<LoadInst>(Repl)) {
       Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand());
-    if (auto *St = dyn_cast<StoreInst>(Repl)) {
+    } else if (auto *St = dyn_cast<StoreInst>(Repl)) {
       Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand());
       Val = dyn_cast<Instruction>(St->getValueOperand());
       // Check that the stored value is available.
       if (Val) {
         if (isa<GetElementPtrInst>(Val)) {
           // Check whether we can compute the GEP at HoistPt.
-          if (!allOperandsAvailable(Val, HoistPt))
+          if (!allGepOperandsAvailable(Val, HoistPt))
             return false;
         } else if (!DT->dominates(Val->getParent(), HoistPt))
           return false;
@@ -631,40 +698,13 @@ private:
     }
 
     // Check whether we can compute the Gep at HoistPt.
-    if (!Gep || !allOperandsAvailable(Gep, HoistPt))
+    if (!Gep || !allGepOperandsAvailable(Gep, HoistPt))
       return false;
 
-    // Copy the gep before moving the ld/st.
-    Instruction *ClonedGep = Gep->clone();
-    ClonedGep->insertBefore(HoistPt->getTerminator());
-    // Conservatively discard any optimization hints, they may differ on the
-    // other paths.
-    for (Instruction *OtherInst : InstructionsToHoist) {
-      GetElementPtrInst *OtherGep;
-      if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst))
-        OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand());
-      else
-        OtherGep = cast<GetElementPtrInst>(
-            cast<StoreInst>(OtherInst)->getPointerOperand());
-      ClonedGep->intersectOptionalDataWith(OtherGep);
-      combineKnownMetadata(ClonedGep, OtherGep);
-    }
-    Repl->replaceUsesOfWith(Gep, ClonedGep);
+    makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep);
 
-    // Also copy Val when it is a GEP.
-    if (Val && isa<GetElementPtrInst>(Val)) {
-      Instruction *ClonedVal = Val->clone();
-      ClonedVal->insertBefore(HoistPt->getTerminator());
-      // Conservatively discard any optimization hints, they may differ on the
-      // other paths.
-      for (Instruction *OtherInst : InstructionsToHoist) {
-        auto *OtherVal =
-            cast<Instruction>(cast<StoreInst>(OtherInst)->getValueOperand());
-        ClonedVal->intersectOptionalDataWith(OtherVal);
-        combineKnownMetadata(ClonedVal, OtherVal);
-      }
-      Repl->replaceUsesOfWith(Val, ClonedVal);
-    }
+    if (Val && isa<GetElementPtrInst>(Val))
+      makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val);
 
     return true;
   }
@@ -695,14 +735,14 @@ private:
         Repl = InstructionsToHoist.front();
 
         // We can move Repl in HoistPt only when all operands are available.
+        // When not HoistingGeps we need to copy the GEPs now.
         // The order in which hoistings are done may influence the availability
         // of operands.
-        if (!allOperandsAvailable(Repl, HoistPt) &&
-            !makeOperandsAvailable(Repl, HoistPt, InstructionsToHoist))
+        if (!allOperandsAvailable(Repl, HoistPt) && !HoistingGeps &&
+            !makeGepOperandsAvailable(Repl, HoistPt, InstructionsToHoist))
           continue;
+
         Repl->moveBefore(HoistPt->getTerminator());
-        // TBAA may differ on one of the other paths, we need to get rid of
-        // anything which might conflict.
       }
 
       if (isa<LoadInst>(Repl))
@@ -783,7 +823,7 @@ private:
               break;
           }
           CI.insert(Call, VN);
-        } else if (OptForMinSize || !isa<GetElementPtrInst>(&I1))
+        } else if (HoistingGeps || !isa<GetElementPtrInst>(&I1))
           // Do not hoist scalars past calls that may write to memory because
           // that could result in spills later. geps are handled separately.
           // TODO: We can relax this for targets like AArch64 as they have more

Added: llvm/trunk/test/Transforms/GVN/hoist-recursive-geps.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/hoist-recursive-geps.ll?rev=276841&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/GVN/hoist-recursive-geps.ll (added)
+++ llvm/trunk/test/Transforms/GVN/hoist-recursive-geps.ll Wed Jul 27 00:48:12 2016
@@ -0,0 +1,103 @@
+; RUN: opt -gvn-hoist -S < %s | FileCheck %s
+
+; Check that recursive GEPs are hoisted.
+; CHECK-LABEL: @fun
+; CHECK: fdiv
+; CHECK: load
+; CHECK: load
+; CHECK: load
+; CHECK: load
+; CHECK: fsub
+; CHECK: fmul
+; CHECK: fsub
+; CHECK: fmul
+; CHECK-NOT: fsub
+; CHECK-NOT: fmul
+
+%0 = type { double, double, double }
+%1 = type { double, double, double }
+%2 = type { %3, %1, %1 }
+%3 = type { i32 (...)**, %4, %10*, %11, %11, %11, %11, %11, %11, %11, %11, %11 }
+%4 = type { %5 }
+%5 = type { %6 }
+%6 = type { %7 }
+%7 = type { %8 }
+%8 = type { %9 }
+%9 = type { i64, i64, i8* }
+%10 = type <{ i32 (...)**, i32, [4 x i8] }>
+%11 = type { [4 x [4 x double]] }
+%12 = type <{ %1, %0, i32, [4 x i8] }>
+%13 = type { %1, %0, %12, %3*, %14* }
+%14 = type opaque
+
+ at d = external global %0, align 8
+ at p = external global %1, align 8
+
+define zeroext i1 @fun(%2*, %12* dereferenceable(56), double*, %13*) {
+  %5 = alloca %2*, align 8
+  %6 = alloca %12*, align 8
+  %7 = alloca double*, align 8
+  %8 = alloca %13*, align 8
+  %9 = alloca double, align 8
+  %10 = alloca double, align 8
+  %11 = alloca double, align 8
+  %12 = alloca double, align 8
+  %13 = alloca double, align 8
+  %14 = alloca double, align 8
+  %15 = alloca double, align 8
+  store %2* %0, %2** %5, align 8
+  store %12* %1, %12** %6, align 8
+  store double* %2, double** %7, align 8
+  store %13* %3, %13** %8, align 8
+  %16 = load %2*, %2** %5, align 8
+  %17 = load double, double* getelementptr inbounds (%0, %0* @d, i32 0, i32 0), align 8
+  %18 = fdiv double 1.000000e+00, %17
+  store double %18, double* %15, align 8
+  %19 = load double, double* %15, align 8
+  %20 = fcmp oge double %19, 0.000000e+00
+  br i1 %20, label %21, label %36
+
+; <label>:21:                                     ; preds = %4
+  %22 = getelementptr inbounds %2, %2* %16, i32 0, i32 1
+  %23 = getelementptr inbounds %1, %1* %22, i32 0, i32 0
+  %24 = load double, double* %23, align 8
+  %25 = load double, double* getelementptr inbounds (%1, %1* @p, i32 0, i32 0), align 8
+  %26 = fsub double %24, %25
+  %27 = load double, double* %15, align 8
+  %28 = fmul double %26, %27
+  store double %28, double* %9, align 8
+  %29 = getelementptr inbounds %2, %2* %16, i32 0, i32 2
+  %30 = getelementptr inbounds %1, %1* %29, i32 0, i32 0
+  %31 = load double, double* %30, align 8
+  %32 = load double, double* getelementptr inbounds (%1, %1* @p, i32 0, i32 0), align 8
+  %33 = fsub double %31, %32
+  %34 = load double, double* %15, align 8
+  %35 = fmul double %33, %34
+  store double %35, double* %12, align 8
+  br label %51
+
+; <label>:36:                                     ; preds = %4
+  %37 = getelementptr inbounds %2, %2* %16, i32 0, i32 2
+  %38 = getelementptr inbounds %1, %1* %37, i32 0, i32 0
+  %39 = load double, double* %38, align 8
+  %40 = load double, double* getelementptr inbounds (%1, %1* @p, i32 0, i32 0), align 8
+  %41 = fsub double %39, %40
+  %42 = load double, double* %15, align 8
+  %43 = fmul double %41, %42
+  store double %43, double* %9, align 8
+  %44 = getelementptr inbounds %2, %2* %16, i32 0, i32 1
+  %45 = getelementptr inbounds %1, %1* %44, i32 0, i32 0
+  %46 = load double, double* %45, align 8
+  %47 = load double, double* getelementptr inbounds (%1, %1* @p, i32 0, i32 0), align 8
+  %48 = fsub double %46, %47
+  %49 = load double, double* %15, align 8
+  %50 = fmul double %48, %49
+  store double %50, double* %12, align 8
+  br label %51
+
+; <label>:51:                                     ; preds = %36, %21
+  %52 = load double, double* %12, align 8
+  %53 = load double, double* %9, align 8
+  %54 = fcmp olt double %52, %53
+  ret i1 %54
+}
\ No newline at end of file




More information about the llvm-commits mailing list